From 2b6623821fa3a89a001160f7c542ef4383c948a5 Mon Sep 17 00:00:00 2001 From: spaceoddity91719 Date: Tue, 19 Apr 2022 17:06:11 +0800 Subject: [PATCH 1/6] =?UTF-8?q?fix:=E5=BC=80=E5=8F=91=E8=A7=84=E8=8C=83+?= =?UTF-8?q?=E5=AF=BC=E5=85=A5=E5=AF=BC=E5=87=BA=E6=95=B0=E6=8D=AE+?= =?UTF-8?q?=E6=89=8B=E5=8A=A8=E5=AE=89=E8=A3=85=E8=B7=AF=E5=BE=84+3.0=20re?= =?UTF-8?q?lease=20note?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../dev/1-development-specifications.md | 2 - .../constraint-design.md | 37 -- .../database-and-schema-design.md | 27 -- .../database-object-design/field-design.md | 40 -- .../database-object-design/table-design.md | 57 --- .../view-and-joined-table-design.md | 19 - .../database-object-naming-conventions.md | 31 -- ...view-of-development-and-design-proposal.md | 15 - .../sql-compilation.md | 143 ------- .../jdbc-configuration.md | 62 --- product/en/docs-mogdb/v2.1/toc.md | 12 - product/en/docs-mogdb/v2.1/toc_dev.md | 12 - .../v3.0/about-mogdb/mogdb-release-notes.md | 380 ++++-------------- ...OPY-FROM-STDIN-statement-to-import-data.md | 2 +- .../dev/1-development-specifications.md | 2 - .../constraint-design.md | 37 -- .../database-and-schema-design.md | 27 -- .../database-object-design/field-design.md | 40 -- .../database-object-design/table-design.md | 57 --- .../view-and-joined-table-design.md | 19 - .../database-object-naming-conventions.md | 31 -- ...view-of-development-and-design-proposal.md | 15 - .../sql-compilation.md | 143 ------- .../jdbc-configuration.md | 62 --- .../installation-guide/manual-installation.md | 3 +- product/en/docs-mogdb/v3.0/toc.md | 12 - product/en/docs-mogdb/v3.0/toc_dev.md | 12 - .../dev/1-development-specifications.md | 68 ++-- .../constraint-design.md | 37 -- .../database-and-schema-design.md | 27 -- .../database-object-design/field-design.md | 40 -- .../database-object-design/table-design.md | 81 ---- .../view-and-joined-table-design.md | 19 - .../database-object-naming-conventions.md | 31 -- ...view-of-development-and-design-proposal.md | 15 - .../sql-compilation.md | 143 ------- .../jdbc-configuration.md | 63 --- product/zh/docs-mogdb/v2.1/toc.md | 12 - product/zh/docs-mogdb/v2.1/toc_dev.md | 12 - .../v3.0/about-mogdb/mogdb-release-notes.md | 376 ++++------------- ...OPY-FROM-STDIN-statement-to-import-data.md | 2 +- .../dev/1-development-specifications.md | 68 ++-- .../constraint-design.md | 37 -- .../database-and-schema-design.md | 27 -- .../database-object-design/field-design.md | 40 -- .../database-object-design/table-design.md | 81 ---- .../view-and-joined-table-design.md | 19 - .../database-object-naming-conventions.md | 31 -- ...view-of-development-and-design-proposal.md | 15 - .../sql-compilation.md | 143 ------- .../jdbc-configuration.md | 63 --- .../installation-guide/manual-installation.md | 3 +- product/zh/docs-mogdb/v3.0/toc.md | 12 - product/zh/docs-mogdb/v3.0/toc_dev.md | 12 - 54 files changed, 244 insertions(+), 2532 deletions(-) delete mode 100644 product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md delete mode 100644 product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md delete mode 100644 product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/field-design.md delete mode 100644 product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/table-design.md delete mode 100644 product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md delete mode 100644 product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-naming-conventions.md delete mode 100644 product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md delete mode 100644 product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/sql-compilation.md delete mode 100644 product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md delete mode 100644 product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md delete mode 100644 product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md delete mode 100644 product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/field-design.md delete mode 100644 product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/table-design.md delete mode 100644 product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md delete mode 100644 product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-naming-conventions.md delete mode 100644 product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md delete mode 100644 product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/sql-compilation.md delete mode 100644 product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md delete mode 100644 product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md delete mode 100644 product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md delete mode 100644 product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/field-design.md delete mode 100644 product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/table-design.md delete mode 100644 product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md delete mode 100644 product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-naming-conventions.md delete mode 100644 product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md delete mode 100644 product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/sql-compilation.md delete mode 100644 product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md delete mode 100644 product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md delete mode 100644 product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md delete mode 100644 product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/field-design.md delete mode 100644 product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/table-design.md delete mode 100644 product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md delete mode 100644 product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-naming-conventions.md delete mode 100644 product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md delete mode 100644 product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/sql-compilation.md delete mode 100644 product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md diff --git a/product/en/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md b/product/en/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md index a19bf3de..f91625bf 100644 --- a/product/en/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md +++ b/product/en/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md @@ -166,8 +166,6 @@ func_addgroup (Add one group) ### Partitioned Table Design -- The partitioned tables supported by MogDB database are range partitioned tables. - - The number of partitioned tables is not recommended to exceed 1000. - The primary key or unique index must contain the partition key. diff --git a/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md b/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md deleted file mode 100644 index d595b4e6..00000000 --- a/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Constraint Design -summary: Constraint Design -author: Guo Huan -date: 2021-10-14 ---- - -# Constraint Design - -## DEFAULT and NULL Constraints - -- [Proposal] If all the column values can be obtained from services, you are not advised to use the **DEFAULT** constraint. Otherwise, unexpected results will be generated during data loading. -- [Proposal] Add **NOT NULL** constraints to columns that never have NULL values. The optimizer automatically optimizes the columns in certain scenarios. -- [Proposal] Explicitly name all constraints excluding **NOT NULL** and **DEFAULT**. - -## Partial Cluster Keys - -A partial cluster key (PCK) is a local clustering technology used for column-store tables. After creating a PCK, you can quickly filter and scan fact tables using min or max sparse indexes in MogDB. Comply with the following rules to create a PCK: - -- [Notice] Only one PCK can be created in a table. A PCK can contain multiple columns, preferably no more than two columns. -- [Proposal] Create a PCK on simple expression filter conditions in a query. Such filter conditions are usually in the form of **col op const**, where **col** specifies a column name, **op** specifies an operator (such as =, >, >=, <=, and <), and **const** specifies a constant. -- [Proposal] If the preceding conditions are met, create a PCK on the column having the most distinct values. - -## Unique Constraints - -- [Notice] Unique constraints can be used in row-store tables and column-store tables. -- [Proposal] The constraint name should indicate that it is a unique constraint, for example, **UNIIncluded columns**. - -## Primary Key Constraints - -- [Notice] Primary key constraints can be used in row-store tables and column-store tables. -- [Proposal] The constraint name should indicate that it is a primary key constraint, for example, **PKIncluded columns**. - -## Check Constraints - -- [Notice] Check constraints can be used in row-store tables but not in column-store tables. -- [Proposal] The constraint name should indicate that it is a check constraint, for example, **CKIncluded columns**. diff --git a/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md b/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md deleted file mode 100644 index c72ba82a..00000000 --- a/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Database and Schema Design -summary: Database and Schema Design -author: Guo Huan -date: 2021-10-14 ---- - -# Database and Schema Design - -In MogDB, services can be isolated by databases and schemas. Databases share little resources and cannot directly access each other. Connections to and permissions on them are also isolated. Schemas share more resources than databases do. User permissions on schemas and subordinate objects can be controlled using the **GRANT** and **REVOKE** syntax. - -- You are advised to use schemas to isolate services for convenience and resource sharing. -- It is recommended that system administrators create schemas and databases and then assign required permissions to users. - -## Database Design - -- [Rule] Create databases as required by your service. Do not use the default **postgres** database of a database instance. -- [Proposal] Create a maximum of three user-defined databases in a database instance. -- [Proposal] To make your database compatible with most characters, you are advised to use the UTF-8 encoding when creating a database. -- [Notice] When you create a database, exercise caution when you set **ENCODING** and **DBCOMPATIBILITY** configuration items. MogDB supports the A, B and PG compatibility modes, which are compatible with the Oracle syntax, MySQL syntax and PostgreSQL syntax, respectively. The syntax behavior varies according to the compatibility mode. By default, the A compatibility mode is used. -- [Notice] By default, a database owner has all permissions for all objects in the database, including the deletion permission. Exercise caution when deleting a permission. - -## Schema Design - -- [Notice] To let a user access an object in a schema, assign the usage permission and the permissions for the object to the user, unless the user has the **sysadmin** permission or is the schema owner. -- [Notice] To let a user create an object in the schema, grant the create permission for the schema to the user. -- [Notice] By default, a schema owner has all permissions for all objects in the schema, including the deletion permission. Exercise caution when deleting a permission. diff --git a/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/field-design.md b/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/field-design.md deleted file mode 100644 index 1759925c..00000000 --- a/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/field-design.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: Field Design -summary: Field Design -author: Guo Huan -date: 2021-10-14 ---- - -# Field Design - -## Selecting a Data Type - -To improve query efficiency, comply with the following rules when designing columns: - -- [Proposal] Use the most efficient data types allowed. - - If all of the following number types provide the required service precision, they are recommended in descending order of priority: integer, floating point, and numeric. - -- [Proposal] In tables that are logically related, columns having the same meaning should use the same data type. - -- [Proposal] For string data, you are advised to use variable-length strings and specify the maximum length. To avoid truncation, ensure that the specified maximum length is greater than the maximum number of characters to be stored. You are not advised to use CHAR(n), BPCHAR(n), NCHAR(n), or CHARACTER(n), unless you know that the string length is fixed. - - For details about string types, see below. - -## Common String Types - -Every column requires a data type suitable for its data characteristics. The following table lists common string types in MogDB. - -**Table 1** Common string types - -| **Name** | **Description** | **Max. Storage Capacity** | -| :------------------- | :----------------------------------------------------------- | :------------------------ | -| CHAR(n) | Fixed-length string, where *n* indicates the stored bytes. If the length of an input string is smaller than *n*, the string is automatically padded to *n* bytes using NULL characters. | 10 MB | -| CHARACTER(n) | Fixed-length string, where *n* indicates the stored bytes. If the length of an input string is smaller than *n*, the string is automatically padded to *n* bytes using NULL characters. | 10 MB | -| NCHAR(n) | Fixed-length string, where *n* indicates the stored bytes. If the length of an input string is smaller than *n*, the string is automatically padded to *n* bytes using NULL characters. | 10 MB | -| BPCHAR(n) | Fixed-length string, where *n* indicates the stored bytes. If the length of an input string is smaller than *n*, the string is automatically padded to *n* bytes using NULL characters. | 10 MB | -| VARCHAR(n) | Variable-length string, where *n* indicates the maximum number of bytes that can be stored. | 10 MB | -| CHARACTER VARYING(n) | Variable-length string, where *n* indicates the maximum number of bytes that can be stored. This data type and VARCHAR(n) are different representations of the same data type. | 10 MB | -| VARCHAR2(n) | Variable-length string, where *n* indicates the maximum number of bytes that can be stored. This data type is added to be compatible with the Oracle database, and its behavior is the same as that of VARCHAR(n). | 10 MB | -| NVARCHAR2(n) | Variable-length string, where *n* indicates the maximum number of bytes that can be stored. | 10 MB | -| TEXT | Variable-length string. Its maximum length is 1 GB minus 8203 bytes. | 1 GB minus 8203 bytes | diff --git a/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/table-design.md b/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/table-design.md deleted file mode 100644 index c8839648..00000000 --- a/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/table-design.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -title: Table Design -summary: Table Design -author: Guo Huan -date: 2021-10-14 ---- - -# Table Design - -Comply with the following principles to properly design a table: - -- [Notice] Reduce the amount of data to be scanned. You can use the pruning mechanism of a partitioned table. -- [Notice] Minimize random I/Os. By clustering or local clustering, you can sequentially store hot data, converting random I/O to sequential I/O to reduce the cost of I/O scanning. - -## Selecting a Storage Mode - -[Proposal] Selecting a storage model is the first step in defining a table. The storage model mainly depends on the customer's service type. For details, see Table 1. - -**Table 1** Table storage modes and scenarios - -| Storage Type | Application Scenario | -| :----------- | :----------------------------------------------------------- | -| Row store | - Point queries (simple index-based queries that only return a few records).
- Scenarios requiring frequent addition, deletion, and modification. | -| Column store | - Statistical analysis queries (requiring a large number of association and grouping operations).
- Ad hoc queries (using uncertain query conditions and unable to utilize indexes to scan row-store tables). | - -## Selecting a Partitioning Mode - -If a table contains a large amount of data, partition the table based on the following rules: - -- [Proposal] Create partitions on columns that indicate certain ranges, such as dates and regions. -- [Proposal] A partition name should show the data characteristics of a partition. For example, its format can be **Keyword+Range** characteristics. -- [Proposal] Set the upper limit of a partition to **MAXVALUE** to prevent data overflow. - -The example of a partitioned table definition is as follows: - -```sql -CREATE TABLE staffS_p1 -( - staff_ID NUMBER(6) not null, - FIRST_NAME VARCHAR2(20), - LAST_NAME VARCHAR2(25), - EMAIL VARCHAR2(25), - PHONE_NUMBER VARCHAR2(20), - HIRE_DATE DATE, - employment_ID VARCHAR2(10), - SALARY NUMBER(8,2), - COMMISSION_PCT NUMBER(4,2), - MANAGER_ID NUMBER(6), - section_ID NUMBER(4) -) -PARTITION BY RANGE (HIRE_DATE) -( - PARTITION HIRE_19950501 VALUES LESS THAN ('1995-05-01 00:00:00'), - PARTITION HIRE_19950502 VALUES LESS THAN ('1995-05-02 00:00:00'), - PARTITION HIRE_maxvalue VALUES LESS THAN (MAXVALUE) -); -``` diff --git a/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md b/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md deleted file mode 100644 index ea5629a4..00000000 --- a/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: View and Joined Table Design -summary: View and Joined Table Design -author: Guo Huan -date: 2021-10-14 ---- - -# View and Joined Table Design - -## View Design - -- [Proposal] Do not nest views unless they have strong dependency on each other. -- [Proposal] Try to avoid collation operations in a view definition. - -## Joined Table Design - -- [Proposal] Minimize joined columns across tables. -- [Proposal] Use the same data type for joined columns. -- [Proposal] The names of joined columns should indicate their relationship. For example, they can use the same name. diff --git a/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-naming-conventions.md b/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-naming-conventions.md deleted file mode 100644 index 1f8e5b2e..00000000 --- a/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-naming-conventions.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Database Object Naming Conventions -summary: Database Object Naming Conventions -author: Guo Huan -date: 2021-10-14 ---- - -# Database Object Naming Conventions - -The name of a database object must meet the following requirements: The name of a non-time series table ranges from 1 to 63 characters and that of a time series table ranges from 1 to 53 characters. The name must start with a letter or underscore (_), and can contain letters, digits, underscores (_), dollar signs ($), and number signs (#). - -- [Proposal] Do not use reserved or non-reserved keywords to name database objects. - - > ![img](https://cdn-mogdb.enmotech.com/docs-media/icon/icon-note.gif) **NOTE:** You can use the select * from pg_get_keywords() query openGauss keyword or view the keyword in [Keywords](2-keywords). - -- [Proposal] Do not use a string enclosed in double quotation marks ("") to define the database object name, unless you need to specify its capitalization. Case sensitivity of database object names makes problem location difficult. - -- [Proposal] Use the same naming format for database objects. - - - In a system undergoing incremental development or service migration, you are advised to comply with its historical naming conventions. - - You are advised to use multiple words separated with underscores (_). - - You are advised to use intelligible names and common acronyms or abbreviations for database objects. Acronyms or abbreviations that are generally understood are recommended. For example, you can use English words or Chinese pinyin indicating actual business terms. The naming format should be consistent within a database instance. - - A variable name must be descriptive and meaningful. It must have a prefix indicating its type. - -- [Proposal] The name of a table object should indicate its main characteristics, for example, whether it is an ordinary, temporary, or unlogged table. - - - An ordinary table name should indicate the business relevant to a dataset. - - Temporary tables are named in the format of **tmp_Suffix**. - - Unlogged tables are named in the format of **ul_Suffix**. - - Foreign tables are named in the format of **f_Suffix**. - - Do not create database objects whose names start with **redis_**. diff --git a/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md b/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md deleted file mode 100644 index 9b98deb8..00000000 --- a/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -title: Overview of Development and Design Proposal -summary: Overview of Development and Design Proposal -author: Guo Huan -date: 2021-10-14 ---- - -# Overview of Development and Design Proposal - -This section describes the design specifications for database modeling and application development. Modeling based on these specifications can better fit the distributed processing architecture of MogDB and output more efficient service SQL code. - -The meaning of "Proposal" and "Notice" in this section is as follows: - -- **Proposal**: Design rules. Services complying with the rules can run efficiently, and those violating the rules may have low performance or logic errors. -- **Notice**: Details requiring attention during service development. This term identifies SQL behavior that complies with SQL standards but users may have misconceptions about, and default behavior that users may be unaware of in a program. diff --git a/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/sql-compilation.md b/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/sql-compilation.md deleted file mode 100644 index 5935dd14..00000000 --- a/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/sql-compilation.md +++ /dev/null @@ -1,143 +0,0 @@ ---- -title: SQL Compilation -summary: SQL Compilation -author: Guo Huan -date: 2021-10-14 ---- - -# SQL Compilation - -## DDL - -- [Proposal] In MogDB, you are advised to execute DDL operations, such as creating table or making comments, separately from batch processing jobs to avoid performance deterioration caused by many concurrent transactions. -- [Proposal] Execute data truncation after unlogged tables are used because MogDB cannot ensure the security of unlogged tables in abnormal scenarios. -- [Proposal] Suggestions on the storage mode of temporary and unlogged tables are the same as those on base tables. Create temporary tables in the same storage mode as the base tables to avoid high computing costs caused by hybrid row and column correlation. -- [Proposal] The total length of an index column cannot exceed 50 bytes. Otherwise, the index size will increase greatly, resulting in large storage cost and low index performance. -- [Proposal] Do not delete objects using **DROP…CASCADE**, unless the dependency between objects is specified. Otherwise, the objects may be deleted by mistake. - -## Data Loading and Uninstalling - -- [Proposal] Provide the inserted column list in the insert statement. Example: - - ```sql - INSERT INTO task(name,id,comment) VALUES ('task1','100','100th task'); - ``` - -- [Proposal] After data is imported to the database in batches or the data increment reaches the threshold, you are advised to analyze tables to prevent the execution plan from being degraded due to inaccurate statistics. - -- [Proposal] To clear all data in a table, you are advised to use **TRUNCATE TABLE** instead of **DELETE TABLE**. **DELETE TABLE** is not efficient and cannot release disk space occupied by the deleted data. - -## Type Conversion - -- [Proposal] Convert data types explicitly. If you perform implicit conversion, the result may differ from expected. -- [Proposal] During data query, explicitly specify the data type for constants, and do not attempt to perform any implicit data type conversion. -- [Notice] If **sql_compatibility** is set to **A**, null strings will be automatically converted to **NULL** during data import. If null strings need to be reserved, set **sql_compatibility** to **C**. - -## Query Operation - -- [Proposal] Do not return a large number of result sets to a client except the ETL program. If a large result set is returned, consider modifying your service design. - -- [Proposal] Perform DDL and DML operations encapsulated in transactions. Operations like table truncation, update, deletion, and dropping, cannot be rolled back once committed. You are advised to encapsulate such operations in transactions so that you can roll back the operations if necessary. - -- [Proposal] During query compilation, you are advised to list all columns to be queried and avoid using **SELECT \***. Doing so reduces output lines, improves query performance, and avoids the impact of adding or deleting columns on front-end service compatibility. - -- [Proposal] During table object access, add the schema prefix to the table object to avoid accessing an unexpected table due to schema switchover. - -- [Proposal] The cost of joining more than three tables or views, especially full joins, is difficult to be estimated. You are advised to use the **WITH TABLE AS** statement to create interim tables to improve the readability of SQL statements. - -- [Proposal] Avoid using Cartesian products or full joins. Cartesian products and full joins will result in a sharp expansion of result sets and poor performance. - -- [Notice] Only **IS NULL** and **IS NOT NULL** can be used to determine NULL value comparison results. If any other method is used, **NULL** is returned. For example, **NULL** instead of expected Boolean values is returned for **NULL<>NULL**, **NULL=NULL**, and **NULL<>1**. - -- [Notice] Do not use **count(col)** instead of **count(\*)** to count the total number of records in a table. **count(\*)** counts the NULL value (actual rows) while **count(col)** does not. - -- [Notice] While executing **count(col)**, the number of NULL record rows is counted as 0. While executing **sum(col)**, **NULL** is returned if all records are NULL. If not all the records are NULL, the number of NULL record rows is counted as 0. - -- [Notice] To count multiple columns using **count()**, column names must be enclosed in parentheses. For example, count ((col1, col2, col3)). Note: When multiple columns are used to count the number of NULL record rows, a row is counted even if all the selected columns are NULL. The result is the same as that when **count(\*)** is executed. - -- [Notice] NULL records are not counted when **count(distinct col)** is used to calculate the number of non-NULL columns that are not repeated. - -- [Notice] If all statistical columns are NULL when **count(distinct (col1,col2,…))** is used to count the number of unique values in multiple columns, NULL records are also counted, and the records are considered the same. - -- [Proposal] Use the connection operator || to replace the **concat** function for string connection because the execution plan generated by the **concat** function cannot be pushed down to disks. As a result, the query performance severely deteriorates. - -- [Proposal] Use the following time-related macros to replace the **now** function and obtain the current time because the execution plan generated by the **now** function cannot be pushed down to disks. As a result, the query performance severely deteriorates. - - **Table 1** Time-related macros - - | **Macro Name** | **Description** | **Example** | - | :------------------- | :----------------------------------------------------------- | :----------------------------------------------------------- | - | CURRENT_DATE | Obtains the current date, excluding the hour, minute, and second details. | `mogdb=# select CURRENT_DATE; date ----- 2018-02-02 (1 row)` | - | CURRENT_TIME | Obtains the current time, excluding the year, month, and day. | `mogdb=# select CURRENT_TIME; timetz -------- 00:39:34.633938+08 (1 row)` | - | CURRENT_TIMESTAMP(n) | Obtains the current date and time, including year, month, day, hour, minute, and second.
NOTE:
**n** indicates the number of digits after the decimal point in the time string. | `mogdb=# select CURRENT_TIMESTAMP(6); timestamptz ----------- 2018-02-02 00:39:55.231689+08 (1 row)` | - -- [Proposal] Do not use scalar subquery statements. A scalar subquery appears in the output list of a SELECT statement. In the following example, the underlined part is a scalar subquery statement: - - ```sql - SELECT id, (SELECT COUNT(*) FROM films f WHERE f.did = s.id) FROM staffs_p1 s; - ``` - - Scalar subqueries often result in query performance deterioration. During application development, scalar subqueries need to be converted into equivalent table associations based on the service logic. - -- [Proposal] In **WHERE** clauses, the filter conditions should be collated. The condition that few records are selected for reading (the number of filtered records is small) is listed at the beginning. - -- [Proposal] Filter conditions in **WHERE** clauses should comply with unilateral rules, that is, to place the column name on one side of a comparison operator. In this way, the optimizer automatically performs pruning optimization in some scenarios. Filter conditions in a **WHERE** clause will be displayed in **col op expression** format, where **col** indicates a table column, **op** indicates a comparison operator, such as = and >, and **expression** indicates an expression that does not contain a column name. Example: - - ```sql - SELECT id, from_image_id, from_person_id, from_video_id FROM face_data WHERE current_timestamp(6) - time < '1 days'::interval; - ``` - - The modification is as follows: - - ```sql - SELECT id, from_image_id, from_person_id, from_video_id FROM face_data where time > current_timestamp(6) - '1 days'::interval; - ``` - -- [Proposal] Do not perform unnecessary collation operations. Collation requires a large amount of memory and CPU. If service logic permits, **ORDER BY** and **LIMIT** can be combined to reduce resource overheads. By default, MogDB perform collation by ASC & NULL LAST. - -- [Proposal] When the **ORDER BY** clause is used for collation, specify collation modes (ASC or DESC), and use NULL FIRST or NULL LAST for NULL record sorting. - -- [Proposal] Do not rely on only the **LIMIT** clause to return the result set displayed in a specific sequence. Combine **ORDER BY** and **LIMIT** clauses for some specific result sets and use **OFFSET** to skip specific results if necessary. - -- [Proposal] If the service logic is accurate, you are advised to use **UNION ALL** instead of **UNION**. - -- [Proposal] If a filter condition contains only an **OR** expression, convert the **OR** expression to **UNION ALL** to improve performance. SQL statements that use **OR** expressions cannot be optimized, resulting in slow execution. Example: - - ```sql - SELECT * FROM scdc.pub_menu - WHERE (cdp= 300 AND inline=301) OR (cdp= 301 AND inline=302) OR (cdp= 302 ANDinline=301); - ``` - - Convert the statement to the following: - - ```sql - SELECT * FROM scdc.pub_menu - WHERE (cdp= 300 AND inline=301) - union all - SELECT * FROM scdc.pub_menu - WHERE (cdp= 301 AND inline=302) - union all - SELECT * FROM tablename - WHERE (cdp= 302 AND inline=301) - ``` - -- [Proposal] If an **in(val1, val2, val3…)** expression contains a large number of columns, you are advised to replace it with the **in (values (va11), (val2),(val3)…)** statement. The optimizer will automatically convert the **IN** constraint into a non-correlated subquery to improve the query performance. - -- [Proposal] Replace **(not) in** with **(not) exist** when associated columns do not contain **NULL** values. For example, in the following query statement, if the **T1.C1** column does not contain any **NULL** value, add the **NOT NULL** constraint to the **T1.C1** column, and then rewrite the statements. - - ```sql - SELECT * FROM T1 WHERE T1.C1 NOT IN (SELECT T2.C2 FROM T2); - ``` - - Rewrite the statement as follows: - - ```sql - SELECT * FROM T1 WHERE NOT EXISTS (SELECT * FROM T1,T2 WHERE T1.C1=T2.C2); - ``` - - > ![img](https://cdn-mogdb.enmotech.com/docs-media/icon/icon-note.gif) **NOTE:** - > - > - If the value of the T1.C1 column is not **NOT NULL**, the preceding rewriting cannot be performed. - > - If the **T1.C1** column is the output of a subquery, check whether the output is **NOT NULL** based on the service logic. - -- [Proposal] Use cursors instead of the **LIMIT OFFSET** syntax to perform pagination queries to avoid resource overheads caused by multiple executions. A cursor must be used in a transaction, and you must disable the cursor and commit the transaction once the query is finished. diff --git a/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md b/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md deleted file mode 100644 index cd91b5d4..00000000 --- a/product/en/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md +++ /dev/null @@ -1,62 +0,0 @@ ---- -title: JDBC Configuration -summary: JDBC Configuration -author: Guo Huan -date: 2021-10-14 ---- - -# JDBC Configuration - -Currently, third-party tools related to MogDB are connected through JDBC. This section describes the precautions for configuring the tool. - -## Connection Parameters - -- [Notice] When a third-party tool connects to MogDB through JDBC, JDBC sends a connection request to MogDB. By default, the following configuration parameters are added. For details, see the implementation of the ConnectionFactoryImpl class in the JDBC code. - - ``` - params = { - { "user", user }, - { "database", database }, - { "client_encoding", "UTF8" }, - { "DateStyle", "ISO" }, - { "extra_float_digits", "3" }, - { "TimeZone", createPostgresTimeZone() }, - }; - ``` - - These parameters may cause the JDBC and **gsql** clients to display inconsistent data, for example, date data display mode, floating point precision representation, and timezone. - - If the result is not as expected, you are advised to explicitly set these parameters in the Java connection setting. - -- [Proposal] When connecting to the database through JDBC, ensure that the following three time zones are the same: - - - Time zone of the host where the JDBC client is located - - - Time zone of the host where the MogDB database instance is located. - - - Time zone used during MogDB database instance configuration. - - > ![img](https://cdn-mogdb.enmotech.com/docs-media/icon/icon-note.gif) **NOTE:** For details about how to set the time zone, see "[Setting the Time Zone and Time](3-modifying-os-configuration#setting-the-time-zone-and-time)" in *Installation Guide*. - -## fetchsize - -[Notice] To use **fetchsize** in applications, disable **autocommit**. Enabling the **autocommit** switch makes the **fetchsize** configuration invalid. - -## autocommit - -[Proposal] You are advised to enable **autocommit** in the code for connecting to MogDB by the JDBC. If **autocommit** needs to be disabled to improve performance or for other purposes, applications need to ensure their transactions are committed. For example, explicitly commit translations after specifying service SQL statements. Particularly, ensure that all transactions are committed before the client exits. - -## Connection Releasing - -[Proposal] You are advised to use connection pools to limit the number of connections from applications. Do not connect to a database every time you run an SQL statement. - -[Proposal] After an application completes its tasks, disconnect its connection to MogDB to release occupied resources. You are advised to set the session timeout interval in the jobs. - -[Proposal] Reset the session environment before releasing connections to the JDBC connection tool. Otherwise, historical session information may cause object conflicts. - -- If GUC parameters are set in the connection, run **SET SESSION AUTHORIZATION DEFAULT;RESET ALL;** to clear the connection status before you return the connection to the connection pool. -- If a temporary table is used, delete the temporary table before you return the connection to the connection pool. - -## CopyManager - -[Proposal] In the scenario where the ETL tool is not used and real-time data import is required, it is recommended that you use the **CopyManager** interface driven by the MogDB JDBC to import data in batches during application development. diff --git a/product/en/docs-mogdb/v2.1/toc.md b/product/en/docs-mogdb/v2.1/toc.md index ac08c7a9..33640dfc 100644 --- a/product/en/docs-mogdb/v2.1/toc.md +++ b/product/en/docs-mogdb/v2.1/toc.md @@ -191,18 +191,6 @@ + [WDR Snapshot Schema](/performance-tuning/wdr-snapshot-schema.md) + [TPCC Performance Tuning Guide](/performance-tuning/TPCC-performance-tuning-guide.md) + Developer Guide - + Development and Design Proposal - + [Overview](/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md) - + [Database Object Naming Conventions](/developer-guide/development-and-design-proposal/database-object-naming-conventions.md) - + Database Object Design - + [Database and Schema Design](/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md) - + [Table Design](/developer-guide/development-and-design-proposal/database-object-design/table-design.md) - + [Field Design](/developer-guide/development-and-design-proposal/database-object-design/field-design.md) - + [Constraint Design](/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md) - + [View and Joined Table Design](/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md) - + Tool Interconnection - + [JDBC Configuration](/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md) - + [SQL Compilation](/developer-guide/development-and-design-proposal/sql-compilation.md) + Application Development Guide + [Development Specifications](/developer-guide/dev/1-development-specifications.md) + Development Based on JDBC diff --git a/product/en/docs-mogdb/v2.1/toc_dev.md b/product/en/docs-mogdb/v2.1/toc_dev.md index 8ff0517b..f6ee35c5 100644 --- a/product/en/docs-mogdb/v2.1/toc_dev.md +++ b/product/en/docs-mogdb/v2.1/toc_dev.md @@ -4,18 +4,6 @@ ## Developer Guide -+ Development and Design Proposal - + [Overview](/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md) - + [Database Object Naming Conventions](/developer-guide/development-and-design-proposal/database-object-naming-conventions.md) - + Database Object Design - + [Database and Schema Design](/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md) - + [Table Design](/developer-guide/development-and-design-proposal/database-object-design/table-design.md) - + [Field Design](/developer-guide/development-and-design-proposal/database-object-design/field-design.md) - + [Constraint Design](/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md) - + [View and Joined Table Design](/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md) - + Tool Interconnection - + [JDBC Configuration](/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md) - + [SQL Compilation](/developer-guide/development-and-design-proposal/sql-compilation.md) + Application Development Guide + [Development Specifications](/developer-guide/dev/1-development-specifications.md) + Development Based on JDBC diff --git a/product/en/docs-mogdb/v3.0/about-mogdb/mogdb-release-notes.md b/product/en/docs-mogdb/v3.0/about-mogdb/mogdb-release-notes.md index dfcd9a08..b883c497 100644 --- a/product/en/docs-mogdb/v3.0/about-mogdb/mogdb-release-notes.md +++ b/product/en/docs-mogdb/v3.0/about-mogdb/mogdb-release-notes.md @@ -1,345 +1,135 @@ --- -title: MogDB 2.1 Release Notes -summary: MogDB 2.1 Release Notes +title: MogDB 3.0 Release Notes +summary: MogDB 3.0 Release Notes author: Guo Huan -date: 2021-12-06 +date: 2022-04-19 --- -# MogDB 2.1 Release Notes +# MogDB 3.0 Release Notes ## 1. Version Description -MogDB version 2.1 is further enhanced based on MogDB version 2.0 and incorporates the new features of openGauss 2.1.0. +MogDB version 3.0 is further enhanced based on MogDB version 2.1 and incorporates the new features of openGauss 3.0.0. -> Note: MogDB 2.1 is the Preview version, and the life cycle of this version is half a year. +
## 2. New Features -### 2.1 Incorporate new features of openGauss 2.1.0 - -- The stored procedure compatibility is enhanced. -- The SQL engine capability is enhanced. -- The Ustore storage engine is supported. -- Segment-page storage is supported. -- High availability is based on the Paxos distributed consistency protocol. -- AI4DB and DB4AI competitiveness is continuously built. -- The log framework and error codes are modified. -- JDBC client load is balanced and read and write are isolated. -- The CMake script compilation is supported. -- The column-store table supports the primary key constraint and unique key constraint. -- The jsonb data type is supported. -- Automatic elimination of unique SQL statements is supported. -- The UCE fault detection is supported. -- The GB18030 character set is supported. -- The standby server catch is optimized. -- The client tool gsql supports automatic supplement of the readline command. -- The dynamic data masking is supported. -- The State Cryptography Administration (SCA) algorithms are supported. -- The tamper-proof ledger database is supported. -- The built-in role and permission management mechanism is supported. -- The transparent encryption is supported. -- The fully-encrypted database is enhanced. -- The dblink is supported. -- The Ubuntu system is supported. -- The hash index is supported. -- UPSERT supports subqueries. -- The MIN/MAX function supports the IP address type. -- The array_remove, array_replace, first, and last functions are added. -- The Data Studio client tool adapts the kernel features. - -### 2.2 Performance Optimization for x86 Architecture - -Optimize the multi-core performance on x86 architecture. The performance of TPC-C under high concurrency is 1.5-5 times that of PostgreSQL 14. The main optimization points are: - -- Support NUMA binding -- Unlocked WAL -- Cache friendly data structure - -### 2.3 Create and Rebuild Indexes Concurrently - -Supports specifying the CONCURRENTLY option when executing create index and reindex index to create and rebuild indexes without blocking the execution of DML statements, improving index maintainability. Supports creating and rebuilding of indexes on ordinary tables and global indexes on partitioned tables concurrently. - -Compared with ordinary index creation and rebuilding, creating and rebuilding concurrently may take longer to complete. - -Indexes on column-store tables, local indexes on partitioned tables, and indexes on temporary tables do not support concurrent index creation and rebuilding. - -**Related Topics** - -- [CREATE INDEX](50-CREATE-INDEX#CONCURRENTLY) -- [REINDEX](117-REINDEX#CONCURRENTLY) - -### 2.4 Enhanced Oracle compatibility - -#### 2.4.1 Support for Orafce plugin - -> Note: Users need to download the plugin package and install it manually. - -By integrating the orafce plugin, the following Oracle compatible syntax is supported: - -- SQL Queries - - DUAL table -- SQL Functions - - Mathematical functions - - BITAND - - COSH - - SINH - - TANH - - String functions - - INSTR - - LENGTH - - LENGTHB - - LPAD - - LTRIM - - NLSSORT - - REGEXP_COUNT - - REGEXP_INSTR - - REGEXP_LIKE - - REGEXP_SUBSTR - - REGEXP_REPLACE - - RPAD - - RTRIM - - SUBSTR - - SUBSTRB - - Date/time functions - - ADD_MONTHS - - DBTIMEZONE - - LAST_DAY - - MONTHS_BETWEEN - - NEXT_DAY - - ROUND - - SESSIONTIMEZONE - - SYSDATE - - TRUNC - - Data type formatting functions - - TO_CHAR - - TO_DATE - - TO_MULTI_BYTE - - TO_NUMBER - - TO_SINGLE_BYTE - - Conditional expressions - - DECODE - - LNNVL - - NANVL - - NVL - - NVL2 - - Aggregate functions - - LISTAGG - - MEDIAN - - Functions that return internal information - - DUMP -- SQL Operators - - Datetime operator -- Packages - - DBMS_ALERT - - DBMS_ASSERT - - DBMS_OUTPUT - - DBMS_PIPE - - DBMS_RANDOM - - DBMS_UTILITY - - UTL_FILE - -**Related Topics** - -- [orafce](orafce-user-guide) - -#### 2.4.2 Support CONNECT BY Syntax - -Provide Oracle-compatible **connect by** syntax, implement level data query control, and display levels, loops, starting levels, etc. - -Provides an oracle-compatible level query function, which can display data content, data levels, paths, etc. in a tree-like structure according to the specified connection relationship, starting conditions, etc. - -Specify the root row of the level query through the start with condition, and perform a recursive query based on these rows to obtain all sub-rows, sub-rows of sub-rows, etc. - -The relationship between the parent row and the child row between the levels is specified by the connect by condition to determine all the child rows of each row that meet the condition. - -If there is a connection, whether it is a connection statement, or in the from or where clause, the result set after the connection is obtained first, and then the level query is performed. - -If there is a where filter condition in the statement, execute the level query first and then filter the result set, instead of filtering out unsatisfied rows and all its sub-rows. - -You can view the level of the row through the level pseudo column, **sys_connect_by_path** to view the path from the root row to the row, and **connect_by_root** to view auxiliary functions such as the root row. - -**Related Topics** - -- [CONNECT BY](139-CONNECT-BY) - -#### 2.4.3 Updatable View - -Supports updatable views. Users can perform Insert/Update/Delete operations on the view, and the update operation will directly affect the base table corresponding to the view. - -Not all views can be updated. There must be a one-to-one correspondence between the rows in the view and the rows in the base table, that is, the content of the view cannot be created based on aggregates or window functions. - -For a view connected by multiple tables, if the primary key (unique key) of a base table can be used as the primary key (unique key) of the view, the view also supports updating, and the update result applies to the base table from which the primary key is derived. - -**Related Topics** - -- [Updatable-views Supported](overview-of-system-catalogs-and-system-views#updatable-views-supported) - -#### 2.4.4 Alter Columns When Rebuilding View - -When the view is rebuilt, it supports the operations of reducing columns and changing column names. This command is only valid for non-materialized views. - -**Related Topics** - -- [CREATE VIEW](70-CREATE-VIEW#replace) - -#### 2.4.5 Support systimestamp Function - -Returns the current system date and time of the server where the database is located, as well as time zone information. +### 2.1 Incorporate new features of openGauss 3.0.0 -**Related Topics** +- Row-store execution to vectorized execution +- Delay of entering the maximum availability mode +- Parallel logical decoding +- Cluster Manager (CM) +- global syscache +- Publication-Subscription +- Foreign key lock enhancement +- Row-store table compression +- Open-source Data Studio +- MySQL to openGauss migration tool Chameleon +- Using ShardingSphere to build a distributed database +- Deploying a distributed database using Kubernetes +- Support ANY permission management +- DBMind componentized +- Database AI algorithms support XGBoost, multiclass and PCA -- [Date and Time Processing Functions and Operators](8-date-and-time-processing-functions-and-operators#systimestamp) +### 2.2 Cluster Manager (CM) -#### 2.4.6 Support sys_guid Function +- Provides the ability to monitor the status of primary and standby nodes, network communication failure monitoring, and file system failure monitoring. +- Provides automatic primary and standby switching capability in case of failure. +- Majority voting using the Paxos algorithm to select the master. +- Require at least three servers to have CM components installed. +- The database servers can be one master and one standby. -The system generates and returns a 16-byte globally unique identifier based on the current time and machine code. +### 2.3 Performance Enhancement -**Related Topics** +#### 2.3.1 Transaction asynchronous commit -- [System Information Functions](23-system-information-functions#sys_guid) +- Split transaction execution and transaction log drop into two phases, CPU bound and IO bound, which are executed by different threads to avoid idle CPU resources when executing IO operations, thus improving CPU resource utilization. +- Optimization of transaction asynchronous commits, which can improve transaction throughput by 20% to 50% and TPCC overall performance by 10% to 20%. -### 2.5 Support PostgreSQL Plugins +#### 2.3.2 Log persistence optimization -> Note: Users need to download the plugin package and install it manually. +- Improve execution performance under high data update load and reduce execution latency. -- [pg_repack](pg_repack-user-guide): Through the trigger mechanism, it provides the function of rebuilding the table online, which is mainly used to reduce the size of the free space in the table online. -- [wal2json](wal2json-user-guide): Through the logical replication mechanism, continuous data changes are provided in the form of json, which are mainly used for heterogeneous replication and other situations. -- [pg_trgm](pg_trgm-user-guide): Implement the trgm word segmentation algorithm to achieve better full-text retrieval capabilities. -- [pg_prewarm](pg_prewarm-user-guide): Pre-cache the specified data table in shared memory to speed up data access. -- [pg_bulkload](pg_bulkload-user-guide): The data is directly loaded into the data file without going through the shared memory, which speeds up the batch import of the database. +#### 2.3.3 Index Parallel Creation Parallelism Definition -### 2.6 Support Read Extensibility +- MogDB provides additional parameters to control the parallelism, so you can set the parallelism manually, which is more flexible. -> Note: Comes with ShardingSphere 5.1.0 and later versions, which need to be downloaded and installed manually by the user. +#### 2.3.4 COPY import SIMD acceleration -MogDB supports read extensibility by integrating with ShardingSphere's Proxy. +- Speeds up the data parsing phase of the COPY command by taking advantage of the CPU's instruction set, thus improving COPY import performance; (x86 CPUs only at this time) -- Read and write transactions are automatically routed to the primary library for execution, and read-only transactions are automatically routed to the backup library for execution; in scenarios with higher read consistency requirements, read-only transactions can also be routed to the primary library for execution through hint control. +#### 2.3.5 Dynamic partition reduction -- Support for automatic identification and configuration of read and write nodes, without the need to configure primary and secondary roles, and automatic discovery of the primary and secondary libraries in the configuration list. +- Added support for dynamic partition reductions. In the prepare-execute execution method and in scenarios where partition constraint expressions contain subqueries, partitions are trimmed during the execution phase based on parameters or subquery results to improve partition table query performance. -- Support for automatic identification of primary and backup roles after switching, with no additional operation required to automatically identify the new primary and backup roles and route them correctly. +### 2.4 Troubleshooting -- Support automatic load balancing of backup nodes: when the backup library is down and recovered or when a new backup library is added, it will be automatically added to the read load balancing after the replication status of the backup library is normal. +#### 2.4.1 Monitor Session level SQL run status -### 2.7 Others +- Collects execution plan trees and dynamically samples execution arithmetic for session-level SQL run states -- The nlssort function supports sorting by pinyin for the GBK character set of rare characters +#### 2.4.2 Enhanced OM troubleshooting capability - **Related Topics**: [SELECT](125-SELECT#nlssort) +- gstrace enhancements: get more targeted execution paths by adding a **component switch**, which is used to improve debug efficiency. +- gs_check enhancement: the original scene check based on the implementation of the test results to save, as well as the difference between the two test results done at different times to compare. +- gs_watch: When MogDB fails, use this tool to collect OS information, log information and configuration files to locate the problem. +- gs_gucquery: realize MogDB GUC value automatic collection, collation, export and difference comparison. -- ALTER SEQUENCE supports modification of increment +### 2.5 Compatibility Enhancement - **Related Topics**: [ALTER SEQUENCE](16-ALTER-SEQUENCE#increment) +#### 2.5.1 Oracle Compatibility Enhancement -- For TIMESTAMP WITH TIME ZONE type, you can use TZH, TZM, TZD, TZR parameters in TO_CHAR to output time zone information +- More function support, more built-in package support: dbms_random, dbms_lob, dbms_metadata, etc. +- Support **connect by** syntax +- Reduce the amount of code modifications required to migrate Oracle applications to MogDB. - **Related Topics**: [Type Conversion Functions](9-type-conversion-functions#to_char) +#### 2.5.2 MySQL Compatibility Enhancement -### 2.8 Preview Features +- More syntax support: timestamp on update, etc.; more data type compatibility; more function compatibility +- Reduce the amount of code modifications required to migrate MySQL applications to MogDB. -> Note: Preview features need to be enabled manually. -> -> ```sql -> alter system set enable_poc_feature = on; -> -- or -> alter system set enable_poc_feature to on; -> -- Or add ‘enable_poc_feature = on’ to the postgresql.conf file in the MogDB data directory -> -- Take effect after restart -> ``` +#### 2.5.3 PostgreSQL Compatibility Enhancement -#### 2.8.1 Row-store Table Compression +##### 2.5.3.1 Added BRIN INDEX (supported since PostgreSQL 9.5) -Supports specifying whether a row-store table (astore) is a compressed table when it is created. For a compressed row-store table, the system compresses the table data automatically to save storage space. When writing data to the compressed table, the system automatically selects the appropriate compression algorithm according to the characteristics of each column, and the user can also specify the compression algorithm used for each column directly. +- Block-wide indexes, compared to precise BTREE indexes, BRIN INDEX provides a balance of a relatively fast query speed with less space consumption +- 1GB table, no index, query single 4s; BTREE index 200MB space, query 4ms; BRIN index 800K, query 58ms. -There is a strong correlation between the actual compression ratio and the data content, and the compression ratio can reach 50% in the typical scenario, and the performance loss is less than 5% in the typical TPC-C model, the actual performance impact depends on the actual system load. +##### 2.5.3.2 Added BLOOM INDEX (supported since PostgreSQL 9.6) -For the non-compressed table, you can also use `Alter Table` to change the table to a compressed table, subsequent new write data will be automatically compressed. +- Bloom filtering: true is not necessarily true, false must be false; there is a miscalculation rate, need to recheck (algorithm implementation, not to user recheck) +- For tables with a large number of fields and a combination of query conditions that may also use a large number of fields; only equals queries are supported +- Ordinary indexes for such scenarios require the creation of multiple indexes, which can have a significant impact on space usage and insertion and update speed +- At this point, you can create a BLOOM index uniformly on all these fields that may be used for query, to obtain a balance of space and query speed, 10GB table scan can be completed in about 1s -**Related Topics** - -- [CREATE TABLE](60-CREATE-TABLE#COMPRESSION) -- [ALTER TABLE](22-ALTER-TABLE#COMPRESS) - -#### 2.8.2 SubPartition - -Support to create subpartition table, data automatically partition storage according to the partition mode, to improve the storage and query efficiency of large data volumes. The supported subpartition combinations include: - -- List-List -- List-Range -- List-Hash -- Range-List -- Range-Range -- Range-Hash - -Support querying a single Partition and SubPartition; - -Supports partition pruning for Partition Key, SubPartition Key or their combined conditions to further optimize partition query efficiency; - -Supports truncate and vacuum operations on partition tables or first-level partitions; - -During Update operation, data movement across partitions is supported (Partition/SubPartition Key is not supported as List or Hash partition type); - -Backup and restore of subpartition are supported. - -**Related Topics** - -- [CREATE TABLE SUBPARTITION](62.1-CREATE-TABLE-SUBPARTITION) -- [ALTER TABLE SUBPARTITION](23.1-ALTER-TABLE-SUBPARTITION) +##### 2.5.3.3 Reduces the difficulty of migrating PostgreSQL applications that use such indexes to MogDB
## 3. Modified Defects -### 3.1 Incorporate openGauss 2.1.0 Modified Defects - -- [I435UP](https://gitee.com/opengauss/openGauss-server/issues/I435UP) An error is reported when the EXPLAIN statement is executed. -- [I44QS6](https://gitee.com/opengauss/openGauss-server/issues/I44QS6) When the **select get_local_active_session() limit 1 ;** function is executed, the database breaks down. -- [I4566H](https://gitee.com/opengauss/openGauss-server/issues/I4566H) After UPDATE GLOBAL INDEX is performed on a partition of a partitioned table, the query result is inconsistent with the master version. -- [I45822](https://gitee.com/opengauss/openGauss-server/issues/I45822) An error occurs when the GPC global plan cache information is queried in the global temporary table. -- [I442TY](https://gitee.com/opengauss/openGauss-server/issues/I442TY) Failed to recover to the timestamp specified by PITR. -- [I45T7A](https://gitee.com/opengauss/openGauss-server/issues/I45T7A) Remote backup is abnormal when the database is installed in environment variable separation mode. -- [I464G5](https://gitee.com/opengauss/openGauss-server/issues/I464G5) Failed to use **gs_ctl build** to rebuild a specified non-instance directory on a standby node. The error information is inconsistent. -- [I45TTB](https://gitee.com/opengauss/openGauss-server/issues/I45TTB) The foreign table is successfully created for the file type that is not supported by file_fdw, but no error is reported. -- [I491CN](https://gitee.com/opengauss/openGauss-server/issues/I491CN) When the subnet mask of the network address of the cidr type is 32, an error is reported when the MAX function is called. -- [I496VN](https://gitee.com/opengauss/openGauss-server/issues/I496VN) After a large number of Xlogs are stacked on the standby node, the archiving address is corrected. As a result, the archiving fails. -- [I49HRV](https://gitee.com/opengauss/openGauss-server/issues/I49HRV) When the standby node archiving is enabled, the standby node archiving is slow. After the switchover, the new primary node is abnormal. -- [I492W4](https://gitee.com/opengauss/openGauss-server/issues/I492W4) When operations related to the mysql_fdw and oracle_fdw foreign tables are performed on the database installed using the OM, a core dump occurs in the database. -- [I498QT](https://gitee.com/opengauss/openGauss-server/issues/I498QT) In the maximum availability mode, when the synchronous standby parameter is ANY2 and the primary server is under continuous pressure, running the **kill-9** command to stop one synchronous standby server causes transaction congestion on the primary server for 2s. -- [I49L15](https://gitee.com/opengauss/openGauss-server/issues/I49L15) Two standby nodes are enabled for archiving. After one node is scaled in and out, the archiving of the other node is abnormal. -- [I43MTG](https://gitee.com/opengauss/openGauss-server/issues/I43MTG) The developer guide does not contain information related to new functions. -- [I42YW8](https://gitee.com/opengauss/openGauss-server/issues/I42YW8) The UPSERT subquery information is not supplemented. -- [I45WDH](https://gitee.com/opengauss/openGauss-server/issues/I45WDH) file_fdw does not support the fixed format. The related description needs to be deleted from the developer guide. -- [I484J0](https://gitee.com/opengauss/openGauss-server/issues/I484J0) The **gs_initdb -T** parameter is not verified, and the value is incorrect after being set according to the guide. -- [I471CS](https://gitee.com/opengauss/openGauss-server/issues/I471CS) When **pgxc_node_name** contains hyphens (-), the database exits abnormally. If residual temporary tables are not cleared, automatic clearance and vacuum cannot be performed. -- [I40QM1](https://gitee.com/opengauss/openGauss-server/issues/I40QM1) When gs_basebackup is executed, an exception occurs on the standby node. As a result, the gs_basebackup process is blocked and cannot exit. -- [I3RTQK](https://gitee.com/opengauss/openGauss-server/issues/I3RTQK) The standby node fails to be backed up using gs_basebackup, and the message "could not fetch mot checkpoint info:, status:7" is displayed. - -### 3.2 MogDB 2.1.0 Modified Defects +### 3.1 Incorporate openGauss 3.0.0 Modified Defects + +- [I4VUXG](https://gitee.com/opengauss/openGauss-server/issues/I4VUXG?from=project-issue): Fixed the data loss issue of unlogged tables. +- [I4SF5P](https://gitee.com/opengauss/openGauss-server/issues/I4SF5P?from=project-issue): Fixed the core dump issue occurred by running create extension dblink after the database is compiled and installed in the release version, and the dblink module is compiled and installed. +- [I4S74D](https://gitee.com/opengauss/openGauss-server/issues/I4S74D?from=project-issue): Fixed the issue of failing to insert data (5⁄5) into a row-store compressed table using Jmeter when the data volume is greater than 1 GB. The compression type is set to **2**. +- [I4N81J](https://gitee.com/opengauss/openGauss-server/issues/I4N81J?from=project-issue): Fixed the issue of failing to synchronize the UPDATE and DELETE operations to subscribers. +- [I4YPJQ](https://gitee.com/opengauss/openGauss-server/issues/I4YPJQ?from=project-issue): Fixed the issue of failing to insert varchar constants into MOTs using JDBC. +- [I4PF6G](https://gitee.com/opengauss/openGauss-server/issues/I4PF6G?from=project-issue): Fixed the issue of TPC-C execution failure during foreign key lock enhancement and gray upgrade from 2.0.0. to 2.2.0 (not committed). +- [I4WPD1](https://gitee.com/opengauss/openGauss-server/issues/I4WPD1?from=project-issue): Fixed the issue of failing to execute simplified installation because the **openGauss-2.1.0-CentOS-64bit.tar.bz2** file is missing in the decompressed installation package. +- [I4L268](https://gitee.com/opengauss/openGauss-server/issues/I4L268?from=project-issue): Fixed the issue of incorrect system catalog **pg_partition** after the partitioned table is truncated for multiple times and then the **vacuum freeze pg_partition** command is executed. +- [I3HZJN](https://gitee.com/opengauss/openGauss-server/issues/I3HZJN?from=project-issue): Fixed the issue of incorrect date format when the **copy** command is executed. +- [I4HUXD](https://gitee.com/opengauss/openGauss-server/issues/I4HUXD?from=project-issue): Fixed the issue of failing to query the JSONB type. +- [I4QDN9](https://gitee.com/opengauss/openGauss-server/issues/I4QDN9?from=project-issue): Fixed the issue of returning a value for **select 1.79E +308\*2,cume_dist() over(order by 1.0E128\*1.2)** out of range. +- [I4PAVO](https://gitee.com/opengauss/openGauss-server/issues/I4PAVO?from=project-issue): Fixed the issue of failing to identify the **start with connect by record** subquery. +- [I4UY9A](https://gitee.com/opengauss/openGauss-server/issues/I4UY9A?from=project-issue): Fixed the issue of failing to create the default partition during list partitioning. +- [I4W3UB](https://gitee.com/opengauss/openGauss-server/issues/I4W3UB?from=project-issue): Fixed the issue of failing to obtain the view definition when the view is created using a user-defined type and the user-defined type is renamed. +- [I4WRMX](https://gitee.com/opengauss/openGauss-server/issues/I4WRMX?from=project-issue): Fixed the issue of failing to clear data in the **statement_history** table. When the database restarts and the **enable_stmt_track** parameter is disabled, no record should be found in the **statement_history** table. +- [I4WOBH](https://gitee.com/opengauss/openGauss-server/issues/I4WOBH?from=project-issue): Fixed the issue of failing to restart the database by setting GUC parameter **pagewriter_sleep** from **360000** to **2000**. -- It prompts **which doesn't support recovery_target_lsn** when **gs_probackup** restores the database - -- **Statement_history** table cannot be cleaned up - -- Abnormal database downtime caused by schema cascade delete operation - -- **\d** in gsql cannot query the field information of the table or view corresponding to the synonym - -- The **lengthb** function does not support large object fields such as blob - -- After enabling sha256 authentication, the original md5 encrypted users can still successfully login through md5 authentication - -- The output of **raise** inside nested stored procedures in MogDB is too detailed - -### MogDB 2.1.1 Modified Defects - -MogDB 2.1.1 is the patch version of MogDB 2.1.0, released on 2022.03.22. Based on MogDB 2.1.0, the following fixes are made: - -- Fixed the defect of coredump caused by parameter overflow in **pg_encoding_to_char()** function - -- Fixed the defect of coredump generated when **connect by** statement is used as query clause - -- Fixed the bug that the order of query data in the connect by statement order by level is inconsistent on the x86 platform +
## 4. Compatibility diff --git a/product/en/docs-mogdb/v3.0/administrator-guide/importing-and-exporting-data/importing-data/3-running-the-COPY-FROM-STDIN-statement-to-import-data.md b/product/en/docs-mogdb/v3.0/administrator-guide/importing-and-exporting-data/importing-data/3-running-the-COPY-FROM-STDIN-statement-to-import-data.md index ba2053c3..1453e3a4 100644 --- a/product/en/docs-mogdb/v3.0/administrator-guide/importing-and-exporting-data/importing-data/3-running-the-COPY-FROM-STDIN-statement-to-import-data.md +++ b/product/en/docs-mogdb/v3.0/administrator-guide/importing-and-exporting-data/importing-data/3-running-the-COPY-FROM-STDIN-statement-to-import-data.md @@ -88,7 +88,7 @@ Errors that occur when data is imported are divided into data format errors and | nodeid | integer | ID of the node where an error is reported | | begintime | timestamp with time zone | Time when a data format error was reported | | filename | character varying | Name of the source data file where a data format error occurs | - | rownum | bigint | Number of the row where a data format error occurs in a source data file | + | rownum | numeric | Number of the row where a data format error occurs in a source data file | | rawrecord | text | Raw record of a data format error in the source data file | | detail | text | Error details | diff --git a/product/en/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md b/product/en/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md index a19bf3de..f91625bf 100644 --- a/product/en/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md +++ b/product/en/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md @@ -166,8 +166,6 @@ func_addgroup (Add one group) ### Partitioned Table Design -- The partitioned tables supported by MogDB database are range partitioned tables. - - The number of partitioned tables is not recommended to exceed 1000. - The primary key or unique index must contain the partition key. diff --git a/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md b/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md deleted file mode 100644 index d595b4e6..00000000 --- a/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Constraint Design -summary: Constraint Design -author: Guo Huan -date: 2021-10-14 ---- - -# Constraint Design - -## DEFAULT and NULL Constraints - -- [Proposal] If all the column values can be obtained from services, you are not advised to use the **DEFAULT** constraint. Otherwise, unexpected results will be generated during data loading. -- [Proposal] Add **NOT NULL** constraints to columns that never have NULL values. The optimizer automatically optimizes the columns in certain scenarios. -- [Proposal] Explicitly name all constraints excluding **NOT NULL** and **DEFAULT**. - -## Partial Cluster Keys - -A partial cluster key (PCK) is a local clustering technology used for column-store tables. After creating a PCK, you can quickly filter and scan fact tables using min or max sparse indexes in MogDB. Comply with the following rules to create a PCK: - -- [Notice] Only one PCK can be created in a table. A PCK can contain multiple columns, preferably no more than two columns. -- [Proposal] Create a PCK on simple expression filter conditions in a query. Such filter conditions are usually in the form of **col op const**, where **col** specifies a column name, **op** specifies an operator (such as =, >, >=, <=, and <), and **const** specifies a constant. -- [Proposal] If the preceding conditions are met, create a PCK on the column having the most distinct values. - -## Unique Constraints - -- [Notice] Unique constraints can be used in row-store tables and column-store tables. -- [Proposal] The constraint name should indicate that it is a unique constraint, for example, **UNIIncluded columns**. - -## Primary Key Constraints - -- [Notice] Primary key constraints can be used in row-store tables and column-store tables. -- [Proposal] The constraint name should indicate that it is a primary key constraint, for example, **PKIncluded columns**. - -## Check Constraints - -- [Notice] Check constraints can be used in row-store tables but not in column-store tables. -- [Proposal] The constraint name should indicate that it is a check constraint, for example, **CKIncluded columns**. diff --git a/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md b/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md deleted file mode 100644 index c72ba82a..00000000 --- a/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Database and Schema Design -summary: Database and Schema Design -author: Guo Huan -date: 2021-10-14 ---- - -# Database and Schema Design - -In MogDB, services can be isolated by databases and schemas. Databases share little resources and cannot directly access each other. Connections to and permissions on them are also isolated. Schemas share more resources than databases do. User permissions on schemas and subordinate objects can be controlled using the **GRANT** and **REVOKE** syntax. - -- You are advised to use schemas to isolate services for convenience and resource sharing. -- It is recommended that system administrators create schemas and databases and then assign required permissions to users. - -## Database Design - -- [Rule] Create databases as required by your service. Do not use the default **postgres** database of a database instance. -- [Proposal] Create a maximum of three user-defined databases in a database instance. -- [Proposal] To make your database compatible with most characters, you are advised to use the UTF-8 encoding when creating a database. -- [Notice] When you create a database, exercise caution when you set **ENCODING** and **DBCOMPATIBILITY** configuration items. MogDB supports the A, B and PG compatibility modes, which are compatible with the Oracle syntax, MySQL syntax and PostgreSQL syntax, respectively. The syntax behavior varies according to the compatibility mode. By default, the A compatibility mode is used. -- [Notice] By default, a database owner has all permissions for all objects in the database, including the deletion permission. Exercise caution when deleting a permission. - -## Schema Design - -- [Notice] To let a user access an object in a schema, assign the usage permission and the permissions for the object to the user, unless the user has the **sysadmin** permission or is the schema owner. -- [Notice] To let a user create an object in the schema, grant the create permission for the schema to the user. -- [Notice] By default, a schema owner has all permissions for all objects in the schema, including the deletion permission. Exercise caution when deleting a permission. diff --git a/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/field-design.md b/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/field-design.md deleted file mode 100644 index 1759925c..00000000 --- a/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/field-design.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: Field Design -summary: Field Design -author: Guo Huan -date: 2021-10-14 ---- - -# Field Design - -## Selecting a Data Type - -To improve query efficiency, comply with the following rules when designing columns: - -- [Proposal] Use the most efficient data types allowed. - - If all of the following number types provide the required service precision, they are recommended in descending order of priority: integer, floating point, and numeric. - -- [Proposal] In tables that are logically related, columns having the same meaning should use the same data type. - -- [Proposal] For string data, you are advised to use variable-length strings and specify the maximum length. To avoid truncation, ensure that the specified maximum length is greater than the maximum number of characters to be stored. You are not advised to use CHAR(n), BPCHAR(n), NCHAR(n), or CHARACTER(n), unless you know that the string length is fixed. - - For details about string types, see below. - -## Common String Types - -Every column requires a data type suitable for its data characteristics. The following table lists common string types in MogDB. - -**Table 1** Common string types - -| **Name** | **Description** | **Max. Storage Capacity** | -| :------------------- | :----------------------------------------------------------- | :------------------------ | -| CHAR(n) | Fixed-length string, where *n* indicates the stored bytes. If the length of an input string is smaller than *n*, the string is automatically padded to *n* bytes using NULL characters. | 10 MB | -| CHARACTER(n) | Fixed-length string, where *n* indicates the stored bytes. If the length of an input string is smaller than *n*, the string is automatically padded to *n* bytes using NULL characters. | 10 MB | -| NCHAR(n) | Fixed-length string, where *n* indicates the stored bytes. If the length of an input string is smaller than *n*, the string is automatically padded to *n* bytes using NULL characters. | 10 MB | -| BPCHAR(n) | Fixed-length string, where *n* indicates the stored bytes. If the length of an input string is smaller than *n*, the string is automatically padded to *n* bytes using NULL characters. | 10 MB | -| VARCHAR(n) | Variable-length string, where *n* indicates the maximum number of bytes that can be stored. | 10 MB | -| CHARACTER VARYING(n) | Variable-length string, where *n* indicates the maximum number of bytes that can be stored. This data type and VARCHAR(n) are different representations of the same data type. | 10 MB | -| VARCHAR2(n) | Variable-length string, where *n* indicates the maximum number of bytes that can be stored. This data type is added to be compatible with the Oracle database, and its behavior is the same as that of VARCHAR(n). | 10 MB | -| NVARCHAR2(n) | Variable-length string, where *n* indicates the maximum number of bytes that can be stored. | 10 MB | -| TEXT | Variable-length string. Its maximum length is 1 GB minus 8203 bytes. | 1 GB minus 8203 bytes | diff --git a/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/table-design.md b/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/table-design.md deleted file mode 100644 index c8839648..00000000 --- a/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/table-design.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -title: Table Design -summary: Table Design -author: Guo Huan -date: 2021-10-14 ---- - -# Table Design - -Comply with the following principles to properly design a table: - -- [Notice] Reduce the amount of data to be scanned. You can use the pruning mechanism of a partitioned table. -- [Notice] Minimize random I/Os. By clustering or local clustering, you can sequentially store hot data, converting random I/O to sequential I/O to reduce the cost of I/O scanning. - -## Selecting a Storage Mode - -[Proposal] Selecting a storage model is the first step in defining a table. The storage model mainly depends on the customer's service type. For details, see Table 1. - -**Table 1** Table storage modes and scenarios - -| Storage Type | Application Scenario | -| :----------- | :----------------------------------------------------------- | -| Row store | - Point queries (simple index-based queries that only return a few records).
- Scenarios requiring frequent addition, deletion, and modification. | -| Column store | - Statistical analysis queries (requiring a large number of association and grouping operations).
- Ad hoc queries (using uncertain query conditions and unable to utilize indexes to scan row-store tables). | - -## Selecting a Partitioning Mode - -If a table contains a large amount of data, partition the table based on the following rules: - -- [Proposal] Create partitions on columns that indicate certain ranges, such as dates and regions. -- [Proposal] A partition name should show the data characteristics of a partition. For example, its format can be **Keyword+Range** characteristics. -- [Proposal] Set the upper limit of a partition to **MAXVALUE** to prevent data overflow. - -The example of a partitioned table definition is as follows: - -```sql -CREATE TABLE staffS_p1 -( - staff_ID NUMBER(6) not null, - FIRST_NAME VARCHAR2(20), - LAST_NAME VARCHAR2(25), - EMAIL VARCHAR2(25), - PHONE_NUMBER VARCHAR2(20), - HIRE_DATE DATE, - employment_ID VARCHAR2(10), - SALARY NUMBER(8,2), - COMMISSION_PCT NUMBER(4,2), - MANAGER_ID NUMBER(6), - section_ID NUMBER(4) -) -PARTITION BY RANGE (HIRE_DATE) -( - PARTITION HIRE_19950501 VALUES LESS THAN ('1995-05-01 00:00:00'), - PARTITION HIRE_19950502 VALUES LESS THAN ('1995-05-02 00:00:00'), - PARTITION HIRE_maxvalue VALUES LESS THAN (MAXVALUE) -); -``` diff --git a/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md b/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md deleted file mode 100644 index ea5629a4..00000000 --- a/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: View and Joined Table Design -summary: View and Joined Table Design -author: Guo Huan -date: 2021-10-14 ---- - -# View and Joined Table Design - -## View Design - -- [Proposal] Do not nest views unless they have strong dependency on each other. -- [Proposal] Try to avoid collation operations in a view definition. - -## Joined Table Design - -- [Proposal] Minimize joined columns across tables. -- [Proposal] Use the same data type for joined columns. -- [Proposal] The names of joined columns should indicate their relationship. For example, they can use the same name. diff --git a/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-naming-conventions.md b/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-naming-conventions.md deleted file mode 100644 index 1f8e5b2e..00000000 --- a/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-naming-conventions.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Database Object Naming Conventions -summary: Database Object Naming Conventions -author: Guo Huan -date: 2021-10-14 ---- - -# Database Object Naming Conventions - -The name of a database object must meet the following requirements: The name of a non-time series table ranges from 1 to 63 characters and that of a time series table ranges from 1 to 53 characters. The name must start with a letter or underscore (_), and can contain letters, digits, underscores (_), dollar signs ($), and number signs (#). - -- [Proposal] Do not use reserved or non-reserved keywords to name database objects. - - > ![img](https://cdn-mogdb.enmotech.com/docs-media/icon/icon-note.gif) **NOTE:** You can use the select * from pg_get_keywords() query openGauss keyword or view the keyword in [Keywords](2-keywords). - -- [Proposal] Do not use a string enclosed in double quotation marks ("") to define the database object name, unless you need to specify its capitalization. Case sensitivity of database object names makes problem location difficult. - -- [Proposal] Use the same naming format for database objects. - - - In a system undergoing incremental development or service migration, you are advised to comply with its historical naming conventions. - - You are advised to use multiple words separated with underscores (_). - - You are advised to use intelligible names and common acronyms or abbreviations for database objects. Acronyms or abbreviations that are generally understood are recommended. For example, you can use English words or Chinese pinyin indicating actual business terms. The naming format should be consistent within a database instance. - - A variable name must be descriptive and meaningful. It must have a prefix indicating its type. - -- [Proposal] The name of a table object should indicate its main characteristics, for example, whether it is an ordinary, temporary, or unlogged table. - - - An ordinary table name should indicate the business relevant to a dataset. - - Temporary tables are named in the format of **tmp_Suffix**. - - Unlogged tables are named in the format of **ul_Suffix**. - - Foreign tables are named in the format of **f_Suffix**. - - Do not create database objects whose names start with **redis_**. diff --git a/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md b/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md deleted file mode 100644 index 9b98deb8..00000000 --- a/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -title: Overview of Development and Design Proposal -summary: Overview of Development and Design Proposal -author: Guo Huan -date: 2021-10-14 ---- - -# Overview of Development and Design Proposal - -This section describes the design specifications for database modeling and application development. Modeling based on these specifications can better fit the distributed processing architecture of MogDB and output more efficient service SQL code. - -The meaning of "Proposal" and "Notice" in this section is as follows: - -- **Proposal**: Design rules. Services complying with the rules can run efficiently, and those violating the rules may have low performance or logic errors. -- **Notice**: Details requiring attention during service development. This term identifies SQL behavior that complies with SQL standards but users may have misconceptions about, and default behavior that users may be unaware of in a program. diff --git a/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/sql-compilation.md b/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/sql-compilation.md deleted file mode 100644 index 5935dd14..00000000 --- a/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/sql-compilation.md +++ /dev/null @@ -1,143 +0,0 @@ ---- -title: SQL Compilation -summary: SQL Compilation -author: Guo Huan -date: 2021-10-14 ---- - -# SQL Compilation - -## DDL - -- [Proposal] In MogDB, you are advised to execute DDL operations, such as creating table or making comments, separately from batch processing jobs to avoid performance deterioration caused by many concurrent transactions. -- [Proposal] Execute data truncation after unlogged tables are used because MogDB cannot ensure the security of unlogged tables in abnormal scenarios. -- [Proposal] Suggestions on the storage mode of temporary and unlogged tables are the same as those on base tables. Create temporary tables in the same storage mode as the base tables to avoid high computing costs caused by hybrid row and column correlation. -- [Proposal] The total length of an index column cannot exceed 50 bytes. Otherwise, the index size will increase greatly, resulting in large storage cost and low index performance. -- [Proposal] Do not delete objects using **DROP…CASCADE**, unless the dependency between objects is specified. Otherwise, the objects may be deleted by mistake. - -## Data Loading and Uninstalling - -- [Proposal] Provide the inserted column list in the insert statement. Example: - - ```sql - INSERT INTO task(name,id,comment) VALUES ('task1','100','100th task'); - ``` - -- [Proposal] After data is imported to the database in batches or the data increment reaches the threshold, you are advised to analyze tables to prevent the execution plan from being degraded due to inaccurate statistics. - -- [Proposal] To clear all data in a table, you are advised to use **TRUNCATE TABLE** instead of **DELETE TABLE**. **DELETE TABLE** is not efficient and cannot release disk space occupied by the deleted data. - -## Type Conversion - -- [Proposal] Convert data types explicitly. If you perform implicit conversion, the result may differ from expected. -- [Proposal] During data query, explicitly specify the data type for constants, and do not attempt to perform any implicit data type conversion. -- [Notice] If **sql_compatibility** is set to **A**, null strings will be automatically converted to **NULL** during data import. If null strings need to be reserved, set **sql_compatibility** to **C**. - -## Query Operation - -- [Proposal] Do not return a large number of result sets to a client except the ETL program. If a large result set is returned, consider modifying your service design. - -- [Proposal] Perform DDL and DML operations encapsulated in transactions. Operations like table truncation, update, deletion, and dropping, cannot be rolled back once committed. You are advised to encapsulate such operations in transactions so that you can roll back the operations if necessary. - -- [Proposal] During query compilation, you are advised to list all columns to be queried and avoid using **SELECT \***. Doing so reduces output lines, improves query performance, and avoids the impact of adding or deleting columns on front-end service compatibility. - -- [Proposal] During table object access, add the schema prefix to the table object to avoid accessing an unexpected table due to schema switchover. - -- [Proposal] The cost of joining more than three tables or views, especially full joins, is difficult to be estimated. You are advised to use the **WITH TABLE AS** statement to create interim tables to improve the readability of SQL statements. - -- [Proposal] Avoid using Cartesian products or full joins. Cartesian products and full joins will result in a sharp expansion of result sets and poor performance. - -- [Notice] Only **IS NULL** and **IS NOT NULL** can be used to determine NULL value comparison results. If any other method is used, **NULL** is returned. For example, **NULL** instead of expected Boolean values is returned for **NULL<>NULL**, **NULL=NULL**, and **NULL<>1**. - -- [Notice] Do not use **count(col)** instead of **count(\*)** to count the total number of records in a table. **count(\*)** counts the NULL value (actual rows) while **count(col)** does not. - -- [Notice] While executing **count(col)**, the number of NULL record rows is counted as 0. While executing **sum(col)**, **NULL** is returned if all records are NULL. If not all the records are NULL, the number of NULL record rows is counted as 0. - -- [Notice] To count multiple columns using **count()**, column names must be enclosed in parentheses. For example, count ((col1, col2, col3)). Note: When multiple columns are used to count the number of NULL record rows, a row is counted even if all the selected columns are NULL. The result is the same as that when **count(\*)** is executed. - -- [Notice] NULL records are not counted when **count(distinct col)** is used to calculate the number of non-NULL columns that are not repeated. - -- [Notice] If all statistical columns are NULL when **count(distinct (col1,col2,…))** is used to count the number of unique values in multiple columns, NULL records are also counted, and the records are considered the same. - -- [Proposal] Use the connection operator || to replace the **concat** function for string connection because the execution plan generated by the **concat** function cannot be pushed down to disks. As a result, the query performance severely deteriorates. - -- [Proposal] Use the following time-related macros to replace the **now** function and obtain the current time because the execution plan generated by the **now** function cannot be pushed down to disks. As a result, the query performance severely deteriorates. - - **Table 1** Time-related macros - - | **Macro Name** | **Description** | **Example** | - | :------------------- | :----------------------------------------------------------- | :----------------------------------------------------------- | - | CURRENT_DATE | Obtains the current date, excluding the hour, minute, and second details. | `mogdb=# select CURRENT_DATE; date ----- 2018-02-02 (1 row)` | - | CURRENT_TIME | Obtains the current time, excluding the year, month, and day. | `mogdb=# select CURRENT_TIME; timetz -------- 00:39:34.633938+08 (1 row)` | - | CURRENT_TIMESTAMP(n) | Obtains the current date and time, including year, month, day, hour, minute, and second.
NOTE:
**n** indicates the number of digits after the decimal point in the time string. | `mogdb=# select CURRENT_TIMESTAMP(6); timestamptz ----------- 2018-02-02 00:39:55.231689+08 (1 row)` | - -- [Proposal] Do not use scalar subquery statements. A scalar subquery appears in the output list of a SELECT statement. In the following example, the underlined part is a scalar subquery statement: - - ```sql - SELECT id, (SELECT COUNT(*) FROM films f WHERE f.did = s.id) FROM staffs_p1 s; - ``` - - Scalar subqueries often result in query performance deterioration. During application development, scalar subqueries need to be converted into equivalent table associations based on the service logic. - -- [Proposal] In **WHERE** clauses, the filter conditions should be collated. The condition that few records are selected for reading (the number of filtered records is small) is listed at the beginning. - -- [Proposal] Filter conditions in **WHERE** clauses should comply with unilateral rules, that is, to place the column name on one side of a comparison operator. In this way, the optimizer automatically performs pruning optimization in some scenarios. Filter conditions in a **WHERE** clause will be displayed in **col op expression** format, where **col** indicates a table column, **op** indicates a comparison operator, such as = and >, and **expression** indicates an expression that does not contain a column name. Example: - - ```sql - SELECT id, from_image_id, from_person_id, from_video_id FROM face_data WHERE current_timestamp(6) - time < '1 days'::interval; - ``` - - The modification is as follows: - - ```sql - SELECT id, from_image_id, from_person_id, from_video_id FROM face_data where time > current_timestamp(6) - '1 days'::interval; - ``` - -- [Proposal] Do not perform unnecessary collation operations. Collation requires a large amount of memory and CPU. If service logic permits, **ORDER BY** and **LIMIT** can be combined to reduce resource overheads. By default, MogDB perform collation by ASC & NULL LAST. - -- [Proposal] When the **ORDER BY** clause is used for collation, specify collation modes (ASC or DESC), and use NULL FIRST or NULL LAST for NULL record sorting. - -- [Proposal] Do not rely on only the **LIMIT** clause to return the result set displayed in a specific sequence. Combine **ORDER BY** and **LIMIT** clauses for some specific result sets and use **OFFSET** to skip specific results if necessary. - -- [Proposal] If the service logic is accurate, you are advised to use **UNION ALL** instead of **UNION**. - -- [Proposal] If a filter condition contains only an **OR** expression, convert the **OR** expression to **UNION ALL** to improve performance. SQL statements that use **OR** expressions cannot be optimized, resulting in slow execution. Example: - - ```sql - SELECT * FROM scdc.pub_menu - WHERE (cdp= 300 AND inline=301) OR (cdp= 301 AND inline=302) OR (cdp= 302 ANDinline=301); - ``` - - Convert the statement to the following: - - ```sql - SELECT * FROM scdc.pub_menu - WHERE (cdp= 300 AND inline=301) - union all - SELECT * FROM scdc.pub_menu - WHERE (cdp= 301 AND inline=302) - union all - SELECT * FROM tablename - WHERE (cdp= 302 AND inline=301) - ``` - -- [Proposal] If an **in(val1, val2, val3…)** expression contains a large number of columns, you are advised to replace it with the **in (values (va11), (val2),(val3)…)** statement. The optimizer will automatically convert the **IN** constraint into a non-correlated subquery to improve the query performance. - -- [Proposal] Replace **(not) in** with **(not) exist** when associated columns do not contain **NULL** values. For example, in the following query statement, if the **T1.C1** column does not contain any **NULL** value, add the **NOT NULL** constraint to the **T1.C1** column, and then rewrite the statements. - - ```sql - SELECT * FROM T1 WHERE T1.C1 NOT IN (SELECT T2.C2 FROM T2); - ``` - - Rewrite the statement as follows: - - ```sql - SELECT * FROM T1 WHERE NOT EXISTS (SELECT * FROM T1,T2 WHERE T1.C1=T2.C2); - ``` - - > ![img](https://cdn-mogdb.enmotech.com/docs-media/icon/icon-note.gif) **NOTE:** - > - > - If the value of the T1.C1 column is not **NOT NULL**, the preceding rewriting cannot be performed. - > - If the **T1.C1** column is the output of a subquery, check whether the output is **NOT NULL** based on the service logic. - -- [Proposal] Use cursors instead of the **LIMIT OFFSET** syntax to perform pagination queries to avoid resource overheads caused by multiple executions. A cursor must be used in a transaction, and you must disable the cursor and commit the transaction once the query is finished. diff --git a/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md b/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md deleted file mode 100644 index cd91b5d4..00000000 --- a/product/en/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md +++ /dev/null @@ -1,62 +0,0 @@ ---- -title: JDBC Configuration -summary: JDBC Configuration -author: Guo Huan -date: 2021-10-14 ---- - -# JDBC Configuration - -Currently, third-party tools related to MogDB are connected through JDBC. This section describes the precautions for configuring the tool. - -## Connection Parameters - -- [Notice] When a third-party tool connects to MogDB through JDBC, JDBC sends a connection request to MogDB. By default, the following configuration parameters are added. For details, see the implementation of the ConnectionFactoryImpl class in the JDBC code. - - ``` - params = { - { "user", user }, - { "database", database }, - { "client_encoding", "UTF8" }, - { "DateStyle", "ISO" }, - { "extra_float_digits", "3" }, - { "TimeZone", createPostgresTimeZone() }, - }; - ``` - - These parameters may cause the JDBC and **gsql** clients to display inconsistent data, for example, date data display mode, floating point precision representation, and timezone. - - If the result is not as expected, you are advised to explicitly set these parameters in the Java connection setting. - -- [Proposal] When connecting to the database through JDBC, ensure that the following three time zones are the same: - - - Time zone of the host where the JDBC client is located - - - Time zone of the host where the MogDB database instance is located. - - - Time zone used during MogDB database instance configuration. - - > ![img](https://cdn-mogdb.enmotech.com/docs-media/icon/icon-note.gif) **NOTE:** For details about how to set the time zone, see "[Setting the Time Zone and Time](3-modifying-os-configuration#setting-the-time-zone-and-time)" in *Installation Guide*. - -## fetchsize - -[Notice] To use **fetchsize** in applications, disable **autocommit**. Enabling the **autocommit** switch makes the **fetchsize** configuration invalid. - -## autocommit - -[Proposal] You are advised to enable **autocommit** in the code for connecting to MogDB by the JDBC. If **autocommit** needs to be disabled to improve performance or for other purposes, applications need to ensure their transactions are committed. For example, explicitly commit translations after specifying service SQL statements. Particularly, ensure that all transactions are committed before the client exits. - -## Connection Releasing - -[Proposal] You are advised to use connection pools to limit the number of connections from applications. Do not connect to a database every time you run an SQL statement. - -[Proposal] After an application completes its tasks, disconnect its connection to MogDB to release occupied resources. You are advised to set the session timeout interval in the jobs. - -[Proposal] Reset the session environment before releasing connections to the JDBC connection tool. Otherwise, historical session information may cause object conflicts. - -- If GUC parameters are set in the connection, run **SET SESSION AUTHORIZATION DEFAULT;RESET ALL;** to clear the connection status before you return the connection to the connection pool. -- If a temporary table is used, delete the temporary table before you return the connection to the connection pool. - -## CopyManager - -[Proposal] In the scenario where the ETL tool is not used and real-time data import is required, it is recommended that you use the **CopyManager** interface driven by the MogDB JDBC to import data in batches during application development. diff --git a/product/en/docs-mogdb/v3.0/installation-guide/manual-installation.md b/product/en/docs-mogdb/v3.0/installation-guide/manual-installation.md index a1fd9827..871861f4 100644 --- a/product/en/docs-mogdb/v3.0/installation-guide/manual-installation.md +++ b/product/en/docs-mogdb/v3.0/installation-guide/manual-installation.md @@ -31,7 +31,8 @@ groupadd dbgrp -g 2000 useradd omm -g 2000 -u 2000 echo "Enmo@123" | passwd --stdin omm mkdir -p /opt/mogdb/software -chown -R omm:dbgrp /opt/software/mogdb +mkdir -p /opt/mogdb/data +chown -R omm:dbgrp /opt/mogdb ``` d. Upload and decompress the binary file. diff --git a/product/en/docs-mogdb/v3.0/toc.md b/product/en/docs-mogdb/v3.0/toc.md index cf0d4f6c..ad02b8f5 100644 --- a/product/en/docs-mogdb/v3.0/toc.md +++ b/product/en/docs-mogdb/v3.0/toc.md @@ -191,18 +191,6 @@ + [WDR Snapshot Schema](/performance-tuning/wdr-snapshot-schema.md) + [TPCC Performance Tuning Guide](/performance-tuning/TPCC-performance-tuning-guide.md) + Developer Guide - + Development and Design Proposal - + [Overview](/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md) - + [Database Object Naming Conventions](/developer-guide/development-and-design-proposal/database-object-naming-conventions.md) - + Database Object Design - + [Database and Schema Design](/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md) - + [Table Design](/developer-guide/development-and-design-proposal/database-object-design/table-design.md) - + [Field Design](/developer-guide/development-and-design-proposal/database-object-design/field-design.md) - + [Constraint Design](/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md) - + [View and Joined Table Design](/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md) - + Tool Interconnection - + [JDBC Configuration](/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md) - + [SQL Compilation](/developer-guide/development-and-design-proposal/sql-compilation.md) + Application Development Guide + [Development Specifications](/developer-guide/dev/1-development-specifications.md) + Development Based on JDBC diff --git a/product/en/docs-mogdb/v3.0/toc_dev.md b/product/en/docs-mogdb/v3.0/toc_dev.md index 60e398ad..38a3d0f5 100644 --- a/product/en/docs-mogdb/v3.0/toc_dev.md +++ b/product/en/docs-mogdb/v3.0/toc_dev.md @@ -4,18 +4,6 @@ ## Developer Guide -+ Development and Design Proposal - + [Overview](/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md) - + [Database Object Naming Conventions](/developer-guide/development-and-design-proposal/database-object-naming-conventions.md) - + Database Object Design - + [Database and Schema Design](/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md) - + [Table Design](/developer-guide/development-and-design-proposal/database-object-design/table-design.md) - + [Field Design](/developer-guide/development-and-design-proposal/database-object-design/field-design.md) - + [Constraint Design](/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md) - + [View and Joined Table Design](/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md) - + Tool Interconnection - + [JDBC Configuration](/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md) - + [SQL Compilation](/developer-guide/development-and-design-proposal/sql-compilation.md) + Application Development Guide + [Development Specifications](/developer-guide/dev/1-development-specifications.md) + Development Based on JDBC diff --git a/product/zh/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md b/product/zh/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md index ac9277d2..1bb81b51 100644 --- a/product/zh/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md +++ b/product/zh/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md @@ -20,7 +20,7 @@ date: 2021-04-27 尽管ISO针对SQL已经发布SQL-92、SQL:1999、SQL:2006等标准,但由于不同数据库自身的特性,使得同样功能在各自产品的实现上不尽相同,这也使得相关的语法规则各有千秋。因此,在制定具体开发规范的时候,需要针对不同数据库来编写相应的规范。 -本规范强调实用性、可操作性,根据开发人员在编码过程中常见的问题和易犯的错误,对代码编写的各方面均进行了详细明确的规范和约束。主要包括下列内容: +本规范强调实用性、可操作性,根据开发人员在编码过程中常见的问题和易犯的错误,对代码编写的各方面均进行了详细明确的规范和约束。主要包括下列内容: - 命名规范 @@ -34,7 +34,7 @@ date: 2021-04-27 - 常用函数 -除此之外,对规范的每条细则均给出具体的范例。 +除此之外,对规范的每条细则均给出具体的范例。 ### 适用范围 @@ -58,11 +58,11 @@ date: 2021-04-27 - 禁止使用保留字,保留关键字参考官方文档。 -- table能包含的column数目,根据字段类型的不同,数目在 250 到 1600 之间 +- table能包含的column数目,根据字段类型的不同,数目在 250 到 1600 之间 ### 临时及备份对象命名 -- 临时或备份的数据库对象名,如table,建议添加日期, 如dba.trade_record_2020_12_08 (其中dba 为DBA专用schema,trade_record为表名,2020_12_08为备份日期)。 +- 临时或备份的数据库对象名,如table,建议添加日期,如dba.trade_record_2020_12_08 (其中dba 为DBA专用schema,trade_record为表名,2020_12_08为备份日期)。 ### 表空间命名 @@ -100,15 +100,15 @@ SELECT 1 ### 变量命名 -- 命名应该使用英文单词,避免使用拼音,特别不应该使用拼音简写。命名不允许使用中文或者特殊字符。 +- 命名应该使用英文单词,避免使用拼音,特别不应该使用拼音简写。命名不允许使用中文或者特殊字符。 -- 如果不涉及复杂运算,一律用number定义计数等简单应用。 +- 如果不涉及复杂运算,一律用number定义计数等简单应用。 ### 分区表命名 - 分区表的表名遵循普通表的正常命名规则。 -- 按时间范围分区(每月一个分区),分区名字为PART_YYYYMM。 +- 按时间范围分区(每月一个分区),分区名字为PART_YYYYMM。 举例: PART_201901、PART_201902 @@ -167,13 +167,11 @@ func_addgroup(增加一个群组) ### partition table设计 -- MogDB/openGauss数据库支持的分区表为范围分区表。 - - 分区表的个数不建议超过1000个。 - 主键或唯一索引必须要包含分区键。 -- 对于数据量比较大的表,根据表数据的属性进行分区,以得到较好的性能。 +- 对于数据量比较大的表,根据表数据的属性进行分区,以得到较好的性能。 - 普通表若要转成分区表,需要新建分区表,然后把普通表中的数据导入到新建的分区表中。因此在初始设计表时,请根据业务提前规划是否使用分区表。 @@ -364,13 +362,13 @@ DROP TABLESPACE - 建议可以采用数值类型的场合,则避免采用字符类型。 -- 建议可以采用varchar(N) 就避免采用char(N), 可以采用varchar(N) 就避免采用text,varchar。 +- 建议可以采用varchar(N) 就避免采用char(N),可以采用varchar(N) 就避免采用text,varchar。 - 只允许用char(N)、varchar(N)及text字符类型。 - MogDB/openGauss新建数据库默认兼容oracle,not null 约束不允许传入空字符串,空字符串默认会转换为null,兼容PG模式的数据库不会有这个问题。 -- 建议使用timestamp with time zone(timestamptz),而不用timestamp without time zone。 +- 建议使用timestamp with time zone(timestamptz),而不用timestamp without time zone。 - 建议使用NUMERIC(precision, scale)来存储货币金额和其它要求精确计算的数值, 而不建议使用real, double precision。 @@ -388,11 +386,11 @@ DROP TABLESPACE - 每个table必须包含主键。 -- 建议不要用有业务含义的名称作为主键,比如身份证或者国家名称,尽管其是unique的。 +- 建议不要用有业务含义的名称作为主键,比如身份证或者国家名称,尽管其是unique的。 - 建议主键的一步到位的写法:id serial primary key 或id bigserial primary key。 -- 建议内容系统中size较大的table主键的等效写法如下,便于后续维护。 +- 建议内容系统中size较大的table主键的等效写法如下,便于后续维护。 ```sql create table test(id serial not null ); @@ -411,7 +409,7 @@ create unique index CONCURRENTLY ON test (id); #### 非空列 -- 所有非空列须在建表之初明确标识“NOT NULL”,上线之后,不再变更。同时需注意 NULL 与空字符‘’的查询结果差别:null 值会转换为 NULL, 而空值无字符显示。 +- 所有非空列须在建表之初明确标识“NOT NULL”,上线之后,不再变更。同时需注意 NULL 与空字符‘’的查询结果差别:null 值会转换为 NULL,而空值无字符显示。 #### 检查约束 @@ -420,20 +418,20 @@ create unique index CONCURRENTLY ON test (id); ### index设计 - MogDB/openGauss 提供的index类型: 行存表支持的索引类型:btree(行存表缺省值)、gin、gist。列存表支持的索引类型:Psort(列存表缺省值)、btree、gin。 -- 建议create 或 drop index 时,加 CONCURRENTLY参数,这是个好习惯,达到与写入数据并发的效果,列存表、分区表和临时表不支持CONCURRENTLY方式创建索引。 -- 建议对于频繁update, delete的包含于index 定义中的column的table, 用create index CONCURRENTLY , drop index CONCURRENTLY的方式进行维护其对应index。 +- 建议create 或 drop index 时,加 CONCURRENTLY参数,这是个好习惯,达到与写入数据并发的效果,列存表、分区表和临时表不支持CONCURRENTLY方式创建索引。 +- 建议对于频繁update、delete的包含于index 定义中的column的table,用create index CONCURRENTLY , drop index CONCURRENTLY的方式进行维护其对应index。 - 建议用unique index 代替unique constraints便于后续维护。 - 建议对where 中带多个字段and条件的高频 query,参考数据分布情况,建多个字段的联合index。 - 每个表的index数量不能超过5个。 - 复合索引的建立需要进行仔细分析: - - 正确选择复合索引中的第一个字段,一般是选择性较好的且在where子句中常用的字段上; - - 复合索引的几个字段是否经常同时以AND方式出现在Where子句中?单字段查询是否极少甚至没有?如果是,则可以建立复合索引;否则考虑单字段索引; - - 如果复合索引中包含的字段经常单独出现在Where子句中,则分解为多个单字段索引; - - 如果既有单字段索引,又有以这个字段为首列的复合索引,一般可考虑删除单字段索引; + - 正确选择复合索引中的第一个字段,一般是选择性较好的且在where子句中常用的字段上; + - 复合索引的几个字段是否经常同时以AND方式出现在Where子句中?单字段查询是否极少甚至没有?如果是,则可以建立复合索引;否则考虑单字段索引; + - 如果复合索引中包含的字段经常单独出现在Where子句中,则分解为多个单字段索引; + - 如果既有单字段索引,又有以这个字段为首列的复合索引,一般可考虑删除单字段索引; - 复合索引第一个字段一般不使用时间字段,因为时间字段多用于范围扫描,而前面的字段使用范围扫描后,后续字段无法用于索引过滤。 - 复合索引字段个数不能超过4个。 -- 频繁DML(写次数明显超过读次数)的表,不要建立太多的索引。 +- 频繁DML(写次数明显超过读次数)的表,不要建立太多的索引。 - 无用的索引以及重复索引应删除,避免对执行计划及数据库性能造成负面影响。 @@ -441,17 +439,17 @@ create unique index CONCURRENTLY ON test (id); - 尽量使用简单视图,尽可能少使用复杂视图。 - 简单视图定义:数据来自单个表,且无分组(DISTINCT/GROUP BY)、无函数。 + 简单视图定义:数据来自单个表,且无分组(DISTINCT/GROUP BY)、无函数。 - 复杂视图定义:数据来自多个表,或有分组,有函数,表的个数不能超过3个。 + 复杂视图定义:数据来自多个表,或有分组,有函数,表的个数不能超过3个。 - 尽量不要使用嵌套视图,如果必须使用,不能超过2层嵌套。 ### function设计 -- 函数必须检索数据库表记录或数据库其他对象,甚至修改(执行Insert、Delete、Update、Drop、Create等操作)数据库信息。 +- 函数必须检索数据库表记录或数据库其他对象,甚至修改(执行Insert、Delete、Update、Drop、Create等操作)数据库信息。 -- 如果某项功能不需要和数据库打交道,则不得通过数据库函数的方式实现。 +- 如果某项功能不需要和数据库打交道,则不得通过数据库函数的方式实现。 - 在函数中避免采用DML或DDL语句。 @@ -589,7 +587,7 @@ f ### 确保使用到所有变量和参数 -- 声明变量也会产生一定的系统开销,并会显得代码不够严谨,在编译时未使用的变量会有告警,需修改以确保没有任何告警。 +- 声明变量也会产生一定的系统开销,并会显得代码不够严谨,在编译时未使用的变量会有告警,需修改以确保没有任何告警。 ## Query操作 @@ -615,11 +613,11 @@ alter table t alter column col set not null; ### DML操作 -- update 时做 <> 判断,比如update table_a set column_b = c where column_b <> c,禁止出现where 1=1 +- update 时做 <> 判断,比如update table_a set column_b = c where column_b <> c,禁止出现where 1=1 - 单条DML语句操作数据量不超过10万 -- 建议清空表时,使用truncate,不建议使用delete +- 建议清空表时,使用truncate,不建议使用delete ### DQL操作 @@ -641,17 +639,17 @@ alter table t alter column col set not null; ### 事务操作 -- 事务中的sql逻辑尽可能的简单,让每个事务的粒度尽可能小,尽量lock少的资源,避免lock 、deadlock的产生,事务执行完及时提交 +- 事务中的sql逻辑尽可能的简单,让每个事务的粒度尽可能小,尽量lock少的资源,避免lock、deadlock的产生,事务执行完及时提交 -- 执行CRAETE、DROP、ALTER等DDL操作, 尤其多条,不要显式的开transaction, 因为加lock的mode非常高,极易产生deadlock +- 执行CRAETE、DROP、ALTER等DDL操作,尤其多条,不要显式的开transaction,因为加lock的mode非常高,极易产生deadlock -- state 为 idle in transaction 的连接,如果出现在Master, 会无谓的lock住相应的资源, 可导致后续产生lock,甚至deadlock; 出现在Slave,可导致卡住主从同步 +- state 为 idle in transaction 的连接,如果出现在Master,会无谓的lock住相应的资源,可导致后续产生lock,甚至deadlock; 出现在Slave,可导致卡住主从同步 ### 其他 -- 建议运行在SSD上的实例, random_page_cost (默认值为4) 设置为1.0~2.0之间, 使查询规划器更倾向于使用索引扫描 +- 建议运行在SSD上的实例,random_page_cost (默认值为4) 设置为1.0~2.0之间,使查询规划器更倾向于使用索引扫描 -- 建议在需要使用explain analyze 查看实际真正执行计划与时间时,如果是写入 query,强烈建议先开启事务, 然后回滚。 +- 建议在需要使用explain analyze查看实际真正执行计划与时间时,如果是写入query,强烈建议先开启事务,然后回滚。 - 对于频繁更新,膨胀率较高的表,应找窗口期执行表重组,降低高水位 @@ -724,7 +722,7 @@ create database dbnam DBCOMPATIBILITY='PG' encoding=’utf8’; #### json类型 -MogDB/openGauss只允许使用json 类型。 +MogDB/openGauss只允许使用json类型。 | 类型 | PostgreSQL | openGauss | 存储尺寸 | 备注 | | ----- | ---------- | --------- | -------- | ---- | diff --git a/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md b/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md deleted file mode 100644 index 98743159..00000000 --- a/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: 约束设计 -summary: 约束设计 -author: Guo Huan -date: 2021-10-14 ---- - -# 约束设计 - -## DEFAULT和NULL约束 - -- 【建议】如果能够从业务层面补全字段值,那么,就不建议使用DEFAULT约束,避免数据加载时产生不符合预期的结果。 -- 【建议】给明确不存在NULL值的字段加上NOT NULL约束,优化器会在特定场景下对其进行自动优化。 -- 【建议】给可以显式命名的约束显式命名。除了NOT NULL和DEFAULT约束外,其他约束都可以显式命名。 - -## 局部聚簇 - -Partial Cluster Key(局部聚簇,简称PCK)是列存表的一种局部聚簇技术,在MogDB中,使用PCK可以通过min/max稀疏索引实现事实表快速过滤扫描。PCK的选取遵循以下原则: - -- 【关注】一张表上只能建立一个PCK,一个PCK可以包含多列,但是一般不建议超过2列。 -- 【建议】在查询中的简单表达式过滤条件上创建PCK。这种过滤条件一般形如col op const,其中col为列名,op为操作符 =、>、>=、<=、<,const为常量值。 -- 【建议】在满足上面条件的前提下,选择distinct值比较多的列上建PCK。 - -## 唯一约束 - -- 【关注】行存表、列存表均支持唯一约束。 -- 【建议】从命名上明确标识唯一约束,例如,命名为“UNI+构成字段”。 - -## 主键约束 - -- 【关注】行存表、列存表均支持主键约束。 -- 【建议】从命名上明确标识主键约束,例如,将主键约束命名为 “PK+字段名”。 - -## 检查约束 - -- 【关注】行存表支持检查约束,而列存表不支持。 -- 【建议】从命名上明确标识检查约束,例如,将检查约束命名为 “CK+字段名”。 diff --git a/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md b/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md deleted file mode 100644 index 1493db0a..00000000 --- a/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Database和Schema设计 -summary: Database和Schema设计 -author: Guo Huan -date: 2021-10-14 ---- - -# Database和Schema设计 - -MogDB中可以使用Database和Schema实现业务的隔离,区别在于Database的隔离更加彻底,各个Database之间共享资源极少,可实现连接隔离、权限隔离等,Database之间无法直接互访。Schema隔离的方式共用资源较多,可以通过grant与revoke语法便捷地控制不同用户对各Schema及其下属对象的权限。 - -- 从便捷性和资源共享效率上考虑,推荐使用Schema进行业务隔离。 -- 建议系统管理员创建Schema和Database,再赋予相关用户对应的权限。 - -## Database设计建议 - -- 【规则】在实际业务中,根据需要创建新的Database,不建议直接使用数据库实例默认的postgres数据库。 -- 【建议】一个数据库实例内,用户自定义的Database数量建议不超过3个。 -- 【建议】为了适应全球化的需求,使数据库编码能够存储与表示绝大多数的字符,建议创建Database的时候使用UTF-8编码。 -- 【关注】创建Database时,需要重点关注字符集编码(ENCODING)和兼容性(DBCOMPATIBILITY)两个配置项。MogDB支持A、B和PG三种兼容模式,分别表示兼容Oracle语法、MySQL语法和PostgreSQL语法,不同兼容模式下的语法行为存在一定差异,默认为A兼容模式。 -- 【关注】Database的owner默认拥有该Database下所有对象的所有权限,包括删除权限。删除权限影响较大,请谨慎使用。 - -## Schema设计建议 - -- 【关注】如果该用户不具有sysadmin权限或者不是该Schema的owner,要访问Schema下的对象,需要同时给用户赋予Schema的usage权限和对象的相应权限。 -- 【关注】如果要在Schema下创建对象,需要授予操作用户该Schema的create权限。 -- 【关注】Schema的owner默认拥有该Schema下对象的所有权限,包括删除权限。删除权限影响较大,请谨慎使用。 diff --git a/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/field-design.md b/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/field-design.md deleted file mode 100644 index 55b455b1..00000000 --- a/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/field-design.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: 字段设计 -summary: 字段设计 -author: Guo Huan -date: 2021-10-14 ---- - -# 字段设计 - -## 选择数据类型 - -在字段设计时,基于查询效率的考虑,一般遵循以下原则: - -- 【建议】尽量使用高效数据类型。 - - 选择数值类型时,在满足业务精度的情况下,选择数据类型的优先级从高到低依次为整数、浮点数、NUMERIC。 - -- 【建议】当多个表存在逻辑关系时,表示同一含义的字段应该使用相同的数据类型。 - -- 【建议】对于字符串数据,建议使用变长字符串数据类型,并指定最大长度。请务必确保指定的最大长度大于需要存储的最大字符数,避免超出最大长度时出现字符截断现象。除非明确知道数据类型为固定长度字符串,否则,不建议使用CHAR(n)、BPCHAR(n)、NCHAR(n)、CHARACTER(n)。 - - 关于字符串类型的详细说明,请参见下文。 - -## 常用字符串类型介绍 - -在进行字段设计时,需要根据数据特征选择相应的数据类型。字符串类型在使用时比较容易混淆,下表列出了MogDB中常见的字符串类型: - -**表 1** 常用字符串类型 - -| **名称** | **描述** | **最大存储空间** | -| :------------------- | :----------------------------------------------------------- | :--------------- | -| CHAR(n) | 定长字符串,n描述了存储的字节长度,如果输入的字符串字节格式小于n,那么后面会自动用空字符补齐至n个字节。 | 10MB | -| CHARACTER(n) | 定长字符串,n描述了存储的字节长度,如果输入的字符串字节格式小于n,那么后面会自动用空字符补齐至n个字节。 | 10MB | -| NCHAR(n) | 定长字符串,n描述了存储的字节长度,如果输入的字符串字节格式小于n,那么后面会自动用空字符补齐至n个字节。 | 10MB | -| BPCHAR(n) | 定长字符串,n描述了存储的字节长度,如果输入的字符串字节格式小于n,那么后面会自动用空字符补齐至n个字节。 | 10MB | -| VARCHAR(n) | 变长字符串,n描述了可以存储的最大字节长度。 | 10MB | -| CHARACTER VARYING(n) | 变长字符串,n描述了可以存储的最大字节长度;此数据类型和VARCHAR(n)是同一数据类型的不同表达形式。 | 10MB | -| VARCHAR2(n) | 变长字符串,n描述了可以存储的最大字节长度,此数据类型是为兼容Oracle类型新增的,行为和VARCHAR(n)一致。 | 10MB | -| NVARCHAR2(n) | 变长字符串,n描述了可以存储的最大字节长度。 | 10MB | -| TEXT | 不限长度(不超过1GB-8203字节)变长字符串。 | 1GB-8203字节 | diff --git a/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/table-design.md b/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/table-design.md deleted file mode 100644 index 9dcd2ba1..00000000 --- a/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/table-design.md +++ /dev/null @@ -1,81 +0,0 @@ ---- -title: 表设计 -summary: 表设计 -author: Guo Huan -date: 2021-10-14 ---- - -# 表设计 - -MogDB是分布式架构。数据分布在各个DN上。总体上讲,良好的表设计需要遵循以下原则: - -- 【关注】将表数据均匀分布在各个DN上。数据均匀分布,可以防止数据在部分DN上集中分布,从而导致因存储倾斜造成数据库实例有效容量下降。通过选择合适的分布列,可以避免数据倾斜。 -- 【关注】将表的扫描压力均匀分散在各个DN上。避免扫描压力集中在部分DN上,而导致性能瓶颈。例如,在事实表上使用等值过滤条件时,将会导致扫描压力不均匀。 -- 【关注】减少需要扫描的数据量。通过分区表的剪枝机制可以大幅减少数据的扫描量。 -- 【关注】尽量减少随机I/O。通过聚簇/局部聚簇可以实现热数据的连续存储,将随机I/O转换为连续I/O,从而减少扫描的I/O代价。 -- 【关注】尽量避免数据shuffle。shuffle,是指在物理上,数据从一个节点,传输到另一个节点。shuffle占用了大量宝贵的网络资源,减小不必要的数据shuffle,可以减少网络压力,使数据的处理本地化,提高数据库实例的性能和可支持的并发度。通过对关联条件和分组条件的仔细设计,能够尽可能地减少不必要的数据shuffle。 - -## 选择存储方案 - -【建议】表的存储类型是表定义设计的第一步,客户业务类型是决定表的存储类型的主要因素,表存储类型的选择依据请参考表1。 - -**表 1** 表的存储类型及场景 - -| 存储类型 | 适用场景 | -| :------- | :----------------------------------------------------------- | -| 行存 | - 点查询(返回记录少,基于索引的简单查询)。
- 增、删、改操作较多的场景。 | -| 列存 | - 统计分析类查询(关联、分组操作较多的场景)。
- 即席查询(查询条件不确定,行存表扫描难以使用索引)。 | - -## 选择分布方案 - -【建议】表的分布方式的选择一般遵循以下原则: - -**表 2** 表的分布方式及使用场景 - -| 分布方式 | 描述 | 适用场景 | -| :---------- | :----------------------------------------------- | :----------------------------- | -| Hash | 表数据通过Hash方式散列到数据库实例中的所有DN上。 | 数据量较大的事实表。 | -| Replication | 数据库实例中每一个DN都有一份全量表数据。 | 维度表、数据量较小的事实表。 | -| Range | 表数据对指定列按照范围进行映射,分布到对应DN。 | 用户需要自定义分布规则的场景。 | -| List | 表数据对指定列按照具体值进行映射,分布到对应DN。 | 用户需要自定义分布规则的场景。 | - -## 选择分区方案 - -当表中的数据量很大时,应当对表进行分区,一般需要遵循以下原则: - -- 【建议】使用具有明显区间性的字段进行分区,比如日期、区域等字段上建立分区。 -- 【建议】分区名称应当体现分区的数据特征。例如,关键字+区间特征。 -- 【建议】将分区上边界的分区值定义为MAXVALUE,以防止可能出现的数据溢出。 - -典型的分区表定义如下: - -```sql -CREATE TABLE staffS_p1 -( - staff_ID NUMBER(6) not null, - FIRST_NAME VARCHAR2(20), - LAST_NAME VARCHAR2(25), - EMAIL VARCHAR2(25), - PHONE_NUMBER VARCHAR2(20), - HIRE_DATE DATE, - employment_ID VARCHAR2(10), - SALARY NUMBER(8,2), - COMMISSION_PCT NUMBER(4,2), - MANAGER_ID NUMBER(6), - section_ID NUMBER(4) -) -PARTITION BY RANGE (HIRE_DATE) -( - PARTITION HIRE_19950501 VALUES LESS THAN ('1995-05-01 00:00:00'), - PARTITION HIRE_19950502 VALUES LESS THAN ('1995-05-02 00:00:00'), - PARTITION HIRE_maxvalue VALUES LESS THAN (MAXVALUE) -); -``` - -## 选择分布键 - -Hash表的分布键选取至关重要,如果分布键选择不当,可能会导致数据倾斜,从而导致查询时,I/O负载集中在部分DN上,影响整体查询性能。因此,在确定Hash表的分布策略之后,需要对表数据进行倾斜性检查,以确保数据的均匀分布。分布键的选择一般需要遵循以下原则: - -- 【建议】选作分布键的字段取值应该比较离散,以便数据能在各个DN上均匀分布。当单个字段无法满足离散条件时,可以考虑使用多个字段一起作为分布键。一般情况下,可以考虑选择表的主键作为分布键。例如,在人员信息表中选择证件号码作为分布键。 -- 【建议】在满足第一条原则的情况下,尽量不要选取在查询中存在常量过滤条件的字段作为分布键。例如,在表dwcjk相关的查询中,字段zqdh存在常量过滤条件“zqdh='000001'”,那么就应当尽量不选择zqdh字段做为分布键。 -- 【建议】在满足前两条原则的情况,尽量选择查询中的关联条件为分布键。当关联条件作为分布键时,join任务的相关数据都分布在DN本地,将极大减少DN之间的数据流动代价。 diff --git a/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md b/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md deleted file mode 100644 index b6d071ac..00000000 --- a/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: 视图和关联表设计 -summary: 视图和关联表设计 -author: Guo Huan -date: 2021-10-14 ---- - -# 视图和关联表设计 - -## 视图设计 - -- 【建议】除非视图之间存在强依赖关系,否则不建议视图嵌套。 -- 【建议】视图定义中尽量避免排序操作。 - -## 关联表设计 - -- 【建议】表之间的关联字段应该尽量少。 -- 【建议】关联字段的数据类型应该保持一致。 -- 【建议】关联字段在命名上,应该可以明显体现出关联关系。例如,采用同样名称来命名。 diff --git a/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-naming-conventions.md b/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-naming-conventions.md deleted file mode 100644 index 797973d2..00000000 --- a/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/database-object-naming-conventions.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: 数据库对象命名 -summary: 数据库对象命名 -author: Guo Huan -date: 2021-10-14 ---- - -# 数据库对象命名 - -数据库对象命名需要满足约束:非时序表长度不超过63个字符,时序表长度不超过53个字符,以字母或下划线开头,中间字符可以是字母、数字、下划线、$、#。 - -- 【建议】避免使用保留或者非保留关键字命名数据库对象。 - - > ![img](https://cdn-mogdb.enmotech.com/docs-media/icon/icon-note.gif) **说明:** 可以使用select * from pg_get_keywords()查询MogDB的关键字,或者在[关键字](2-keywords)章节中查看。 - -- 【建议】避免使用双引号括起来的字符串来定义数据库对象名称,除非需要限制数据库对象名称的大小写。数据库对象名称大小写敏感会使定位问题难度增加。 - -- 【建议】数据库对象命名风格务必保持统一。 - - - 增量开发的业务系统或进行业务迁移的系统,建议遵守历史的命名风格。 - - 建议使用多个单词组成,以下划线分割。 - - 数据库对象名称建议能够望文知意,尽量避免使用自定义缩写(可以使用通用的术语缩写进行命名)。例如,在命名中可以使用具有实际业务含义的英文词汇或汉语拼音,但规则应该在数据库实例范围内保持一致。 - - 变量名的关键是要具有描述性,即变量名称要有一定的意义,变量名要有前缀标明该变量的类型。 - -- 【建议】表对象的命名应该可以表征该表的重要特征。例如,在表对象命名时区分该表是普通表、临时表还是非日志表: - - - 普通表名按照数据集的业务含义命名。 - - 临时表以“tmp_+后缀”命名。 - - 非日志表以“ul_+后缀”命名。 - - 外表以“f_+后缀”命名。 - - 不创建以redis_为前缀的数据库对象。 diff --git a/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md b/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md deleted file mode 100644 index 782d0075..00000000 --- a/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -title: 开发设计建议概述 -summary: 开发设计建议概述 -author: Guo Huan -date: 2021-10-14 ---- - -# 开发设计建议概述 - -本开发设计建议约定数据库建模和数据库应用程序开发过程中,应当遵守的设计规范。依据这些规范进行建模,能够更好的契合MogDB的分布式处理架构,输出更高效的业务SQL代码。 - -本开发设计建议中所陈述的“建议”和“关注”含义如下: - -- **建议**:用户应当遵守的设计规则。遵守这些规则,能够保证业务的高效运行;违反这些规则,将导致业务性能的大幅下降或某些业务逻辑错误。 -- **关注**:在业务开发过程中客户需要注意的细则。用于标识容易导致客户理解错误的知识点(实际上遵守SQL标准的SQL行为),或者程序中潜在的客户不易感知的默认行为。 diff --git a/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/sql-compilation.md b/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/sql-compilation.md deleted file mode 100644 index 046ede24..00000000 --- a/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/sql-compilation.md +++ /dev/null @@ -1,143 +0,0 @@ ---- -title: SQL编写 -summary: SQL编写 -author: Guo Huan -date: 2021-10-14 ---- - -# SQL编写 - -## DDL - -- 【建议】在MogDB中,建议DDL(建表、comments等)操作统一执行,在批处理作业中尽量避免DDL操作。避免大量并发事务对性能的影响。 -- 【建议】在非日志表(unlogged table)使用完后,立即执行数据清理(truncate)操作。因为在异常场景下,MogDB不保证非日志表(unlogged table)数据的安全性。 -- 【建议】临时表和非日志表的存储方式建议和基表相同。当基表为行存(列存)表时,临时表和非日志表也推荐创建为行存(列存)表,可以避免行列混合关联带来的高计算代价。 -- 【建议】索引字段的总长度不超过50字节。否则,索引大小会膨胀比较严重,带来较大的存储开销,同时索引性能也会下降。 -- 【建议】不要使用DROP…CASCADE方式删除对象,除非已经明确对象间的依赖关系,以免误删。 - -## 数据加载和卸载 - -- 【建议】在insert语句中显式给出插入的字段列表。例如: - - ```sql - INSERT INTO task(name,id,comment) VALUES ('task1','100','第100个任务'); - ``` - -- 【建议】在批量数据入库之后,或者数据增量达到一定阈值后,建议对表进行analyze操作,防止统计信息不准确而导致的执行计划劣化。 - -- 【建议】如果要清理表中的所有数据,建议使用truncate table方式,不要使用delete table方式。delete table方式删除性能差,且不会释放那些已经删除了的数据占用的磁盘空间。 - -## 类型转换 - -- 【建议】在需要数据类型转换(不同数据类型进行比较或转换)时,使用强制类型转换,以防隐式类型转换结果与预期不符。 -- 【建议】在查询中,对常量要显式指定数据类型,不要试图依赖任何隐式的数据类型转换。 -- 【关注】若sql_compatibility参数设置为A,在导入数据时,空字符串会自动转化为NULL。如果需要保留空字符串需要sql_compatibility参数设置为C。 - -## 查询操作 - -- 【建议】除ETL程序外,应该尽量避免向客户端返回大量结果集的操作。如果结果集过大,应考虑业务设计是否合理。 - -- 【建议】使用事务方式执行DDL和DML操作。例如,truncate table、update table、delete table、drop table等操作,一旦执行提交就无法恢复。对于这类操作,建议使用事务进行封装,必要时可以进行回滚。 - -- 【建议】在查询编写时,建议明确列出查询涉及的所有字段,不建议使用“SELECT *”这种写法。一方面基于性能考虑,尽量减少查询输出列;另一方面避免增删字段对前端业务兼容性的影响。 - -- 【建议】在访问表对象时带上schema前缀,可以避免因schema切换导致访问到非预期的表。 - -- 【建议】超过3张表或视图进行关联(特别是full join)时,执行代价难以估算。建议使用WITH TABLE AS语句创建中间临时表的方式增加SQL语句的可读性。 - -- 【建议】尽量避免使用笛卡尔积和Full join。这些操作会造成结果集的急剧膨胀,同时其执行性能也很低。 - -- 【关注】NULL值的比较只能使用IS NULL或者IS NOT NULL的方式判断,其他任何形式的逻辑判断都返回NULL。例如:NULL<>NULL、NULL=NULL和NULL<>1返回结果都是NULL,而不是期望的布尔值。 - -- 【关注】需要统计表中所有记录数时,不要使用count(col)来替代count(*)。count(*)会统计NULL值(真实行数),而count(col)不会统计。 - -- 【关注】在执行count(col)时,将“值为NULL”的记录行计数为0。在执行sum(col)时,当所有记录都为NULL时,最终将返回NULL;当不全为NULL时,“值为NULL”的记录行将被计数为0。 - -- 【关注】count(多个字段)时,多个字段名必须用圆括号括起来。例如,count( (col1,col2,col3) )。注意:通过多字段统计行数时,即使所选字段都为NULL,该行也被计数,效果与count(*)一致。 - -- 【关注】count(distinct col)用来计算该列不重复的非NULL的数量,NULL将不被计数。 - -- 【关注】count(distinct (col1,col2,…))用来统计多列的唯一值数量,当所有统计字段都为NULL时,也会被计数,同时这些记录被认为是相同的。 - -- 【建议】使用连接操作符“||”替换concat函数进行字符串连接。因为concat函数生成的执行计划不能下推,导致查询性能严重劣化。 - -- 【建议】使用下面时间相关的宏替换now函数来获取当前时间。因为now函数生成的执行计划无法下推,导致查询性能严重劣化。 - - **表 1** 时间相关的宏 - - | **宏名称** | **描述** | **示例** | - | :------------------- | :----------------------------------------------------------- | :----------------------------------------------------------- | - | CURRENT_DATE | 获取当前日期,不包含时分秒。 | `mogdb=# select CURRENT_DATE; date ----- 2018-02-02 (1 row)` | - | CURRENT_TIME | 获取当前时间,不包含年月日。 | `mogdb=# select CURRENT_TIME; timetz -------- 00:39:34.633938+08 (1 row)` | - | CURRENT_TIMESTAMP(n) | 获取当前日期和时间,包含年月日时分秒。
说明:
n表示存储的毫秒位数。 | `mogdb=# select CURRENT_TIMESTAMP(6); timestamptz ----------- 2018-02-02 00:39:55.231689+08 (1 row)` | - -- 【建议】尽量避免标量子查询语句的出现。标量子查询是出现在select语句输出列表中的子查询,在下面例子中,下划线部分即为一个标量子查询语句: - - ```sql - SELECT id, (SELECT COUNT(*) FROM films f WHERE f.did = s.id) FROM staffs_p1 s; - ``` - - 标量子查询往往会导致查询性能急剧劣化,在应用开发过程中,应当根据业务逻辑,对标量子查询进行等价转换,将其写为表关联。 - -- 【建议】在where子句中,应当对过滤条件进行排序,把选择读较小(筛选出的记录数较少)的条件排在前面。 - -- 【建议】where子句中的过滤条件,尽量符合单边规则。即把字段名放在比较条件的一边,优化器在某些场景下会自动进行剪枝优化。形如col op expression,其中col为表的一个列,op为‘=’、‘>’的等比较操作符,expression为不含列名的表达式。例如, - - ```sql - SELECT id, from_image_id, from_person_id, from_video_id FROM face_data WHERE current_timestamp(6) - time < '1 days'::interval; - ``` - - 改写为: - - ```sql - SELECT id, from_image_id, from_person_id, from_video_id FROM face_data where time > current_timestamp(6) - '1 days'::interval; - ``` - -- 【建议】尽量避免不必要的排序操作。排序需要耗费大量的内存及CPU,如果业务逻辑许可,可以组合使用order by和limit,减小资源开销。MogDB默认按照ASC & NULL LAST进行排序。 - -- 【建议】使用ORDER BY子句进行排序时,显式指定排序方式(ASC/DESC),NULL的排序方式(NULL FIRST/NULL LAST)。 - -- 【建议】不要单独依赖limit子句返回特定顺序的结果集。如果部分特定结果集,可以将ORDER BY子句与Limit子句组合使用,必要时也可以使用offset跳过特定结果。 - -- 【建议】在保障业务逻辑准确的情况下,建议尽量使用UNION ALL来代替UNION。 - -- 【建议】如果过滤条件只有OR表达式,可以将OR表达式转化为UNION ALL以提升性能。使用OR的SQL语句经常无法优化,导致执行速度慢。例如,将下面语句 - - ```sql - SELECT * FROM scdc.pub_menu - WHERE (cdp= 300 AND inline=301) OR (cdp= 301 AND inline=302) OR (cdp= 302 ANDinline=301); - ``` - - 转换为: - - ```sql - SELECT * FROM scdc.pub_menu - WHERE (cdp= 300 AND inline=301) - union all - SELECT * FROM scdc.pub_menu - WHERE (cdp= 301 AND inline=302) - union all - SELECT * FROM tablename - WHERE (cdp= 302 AND inline=301) - ``` - -- 【建议】当in(val1, val2, val3…)表达式中字段较多时,建议使用in (values (va11), (val2),(val3)…)语句进行替换。优化器会自动把in约束转换为非关联子查询,从而提升查询性能。 - -- 【建议】在关联字段不存在NULL值的情况下,使用(not) exist代替(not) in。例如,在下面查询语句中,当T1.C1列不存在NULL值时,可以先为T1.C1字段添加NOT NULL约束,再进行如下改写。 - - ```sql - SELECT * FROM T1 WHERE T1.C1 NOT IN (SELECT T2.C2 FROM T2); - ``` - - 可以改写为: - - ```sql - SELECT * FROM T1 WHERE NOT EXISTS (SELECT * FROM T1,T2 WHERE T1.C1=T2.C2); - ``` - - > ![img](https://cdn-mogdb.enmotech.com/docs-media/icon/icon-note.gif) **说明:** - > - > - 如果不能保证T1.C1列的值为NOT NULL的情况下,就不能进行上述改写。 - > - 如果T1.C1为子查询的输出,要根据业务逻辑确认其输出是否为NOT NULL。 - -- 【建议】通过游标进行翻页查询,而不是使用LIMIT OFFSET语法,避免多次执行带来的资源开销。游标必须在事务中使用,执行完后务必关闭游标并提交事务。 diff --git a/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md b/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md deleted file mode 100644 index 7ef8ae23..00000000 --- a/product/zh/docs-mogdb/v2.1/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: JDBC配置 -summary: JDBC配置 -author: Guo Huan -date: 2021-10-14 ---- - -# JDBC配置 - -目前,MogDB相关的第三方工具都是通过JDBC进行连接的,此部分将介绍工具配置时的注意事项。 - -## 连接参数 - -- 【关注】第三方工具通过JDBC连接MogDB时,JDBC向MogDB发起连接请求,会默认添加以下配置参数,详见JDBC代码ConnectionFactoryImpl类的实现。 - - ``` - params = { - { "user", user }, - { "database", database }, - { "client_encoding", "UTF8" }, - { "DateStyle", "ISO" }, - { "extra_float_digits", "2" }, - { "TimeZone", createPostgresTimeZone() }, - }; - ``` - - 这些参数可能会导致JDBC客户端的行为与gsql客户端的行为不一致,例如,Date数据显示方式、浮点数精度表示、timezone显示。 - - 如果实际期望和这些配置不符,建议在java连接设置代码中显式设定这些参数。 - -- 【建议】通过JDBC连接数据库时,应该保证下面三个时区设置一致: - - - JDBC客户端所在主机的时区。 - - - MogDB数据库实例所在主机的时区。 - - - MogDB数据库实例配置过程中时区。 - - > ![img](https://cdn-mogdb.enmotech.com/docs-media/icon/icon-note.gif) **说明:** - > 时区设置相关的操作,请参考《安装指南》中“[设置时区和时间](3-modifying-os-configuration#设置时区和时间)“部分内容。 - -## fetchsize - -【关注】在应用程序中,如果需要使用fetchsize,必须关闭autocommit。开启autocommit,会令fetchsize配置失效。 - -## autocommit - -【建议】在JDBC向MogDB申请连接的代码中,建议显式打开autocommit开关。如果基于性能或者其它方面考虑,需要关闭autocommit时,需要应用程序自己来保证事务的提交。例如,在指定的业务SQL执行完之后做显式提交,特别是客户端退出之前务必保证所有的事务已经提交。 - -## 释放连接 - -【建议】推荐使用连接池限制应用程序的连接数。每执行一条SQL就连接一次数据库,是一种不好SQL的编写习惯。 - -【建议】在应用程序完成作业任务之后,应当及时断开和MogDB的连接,释放资源。建议在任务中设置session超时时间参数。 - -【建议】使用JDBC连接池,在将连接释放给连接池前,需要执行以下操作,重置会话环境。否则,可能会因为历史会话信息导致的对象冲突。 - -- 如果在连接中设置了GUC参数,那么在将连接归还连接池之前,必须使用“SET SESSION AUTHORIZATION DEFAULT;RESET ALL;”将连接的状态清空。 -- 如果使用了临时表,那么在将连接归还连接池之前,必须将临时表删除。 - -## CopyManager - -【建议】在不使用ETL工具,数据入库实时性要求又比较高的情况下,建议在开发应用程序时,使用MogDB JDBC驱动的copyManger接口进行微批导入。 diff --git a/product/zh/docs-mogdb/v2.1/toc.md b/product/zh/docs-mogdb/v2.1/toc.md index d5009fe8..8a3ee2c2 100644 --- a/product/zh/docs-mogdb/v2.1/toc.md +++ b/product/zh/docs-mogdb/v2.1/toc.md @@ -191,18 +191,6 @@ + [WDR解读指南](/performance-tuning/wdr-snapshot-schema.md) + [TPCC性能优化指南](/performance-tuning/TPCC-performance-tuning-guide.md) + 开发者指南 - + 开发设计建议 - + [概述](/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md) - + [数据库对象命名](/developer-guide/development-and-design-proposal/database-object-naming-conventions.md) - + 数据库对象设计 - + [Database和Schema设计](/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md) - + [表设计](/developer-guide/development-and-design-proposal/database-object-design/table-design.md) - + [字段设计](/developer-guide/development-and-design-proposal/database-object-design/field-design.md) - + [约束设计](/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md) - + [视图和关联表设计](/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md) - + 工具对接 - + [JDBC配置](/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md) - + [SQL编写](/developer-guide/development-and-design-proposal/sql-compilation.md) + 应用程序开发教程 + [开发规范](/developer-guide/dev/1-development-specifications.md) + 基于JDBC开发 diff --git a/product/zh/docs-mogdb/v2.1/toc_dev.md b/product/zh/docs-mogdb/v2.1/toc_dev.md index 723b6975..35476545 100644 --- a/product/zh/docs-mogdb/v2.1/toc_dev.md +++ b/product/zh/docs-mogdb/v2.1/toc_dev.md @@ -4,18 +4,6 @@ ## 开发者指南 -+ 开发设计建议 - + [概述](/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md) - + [数据库对象命名](/developer-guide/development-and-design-proposal/database-object-naming-conventions.md) - + 数据库对象设计 - + [Database和Schema设计](/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md) - + [表设计](/developer-guide/development-and-design-proposal/database-object-design/table-design.md) - + [字段设计](/developer-guide/development-and-design-proposal/database-object-design/field-design.md) - + [约束设计](/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md) - + [视图和关联表设计](/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md) - + 工具对接 - + [JDBC配置](/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md) - + [SQL编写](/developer-guide/development-and-design-proposal/sql-compilation.md) + 应用程序开发教程 + [开发规范](/developer-guide/dev/1-development-specifications.md) + 基于JDBC开发 diff --git a/product/zh/docs-mogdb/v3.0/about-mogdb/mogdb-release-notes.md b/product/zh/docs-mogdb/v3.0/about-mogdb/mogdb-release-notes.md index 48345360..d2b89ecd 100644 --- a/product/zh/docs-mogdb/v3.0/about-mogdb/mogdb-release-notes.md +++ b/product/zh/docs-mogdb/v3.0/about-mogdb/mogdb-release-notes.md @@ -1,337 +1,135 @@ --- -title: MogDB 2.1 发行说明 -summary: MogDB 2.1 发行说明 +title: MogDB 3.0 发行说明 +summary: MogDB 3.0 发行说明 author: Guo Huan -date: 2021-12-06 +date: 2022-04-19 --- -# MogDB 2.1 发行说明 +# MogDB 3.0 发行说明 ## 1. 版本说明 -MogDB 2.1版本基于MogDB 2.0版本进一步增强,并合入了openGauss 2.1.0版本的新增特性。 +MogDB 3.0版本基于MogDB 2.1版本进一步增强,并合入了openGauss 3.0.0版本的新增特性。 -> 说明:MogDB 2.1是Preview版本,该版本生命周期为半年。 +
## 2. 新增特性 -### 2.1 集成openGauss 2.1.0版本新增特性 - -- 存储过程兼容性增强 -- SQL引擎能力增强 -- 支持Ustore存储引擎 -- 支持段页式存储 -- 基于Paxos分布式一致性协议的高可用 -- AI4DB和DB4AI竞争力持续构筑 -- 日志框架及错误码整改 -- JDBC客户端负载均衡及读写分离 -- 支持cmake脚本编译 -- 列存表支持主键唯一键约束 -- 支持jsonb数据类型 -- 支持unique sql自动淘汰 -- UCE故障感知 -- 支持GB18030字符集 -- 备机catch优化 -- 客户端工具gsql支持readline命令自动补齐 -- 动态数据脱敏 -- 支持国密算法 -- 防篡改账本数据库 -- 内置角色和权限管理机制 -- 透明加密 -- 全密态数据库增强 -- 支持dblink -- 支持Ubuntu系统 -- 支持Hash索引 -- upsert支持子查询 -- min/max函数支持ip地址类型 -- 增加array_remove/array_replace/first/last函数 -- Data Studio客户端工具适配内核特性 - -### 2.2 x86架构性能优化 - - 优化x86架构下的多核性能,TPC-C高并发下性能达到PostgreSQL 14的1.5-5倍,主要优化点有: - -- 支持NUMA绑核 -- 无锁WAL -- Cache友好数据结构 - -### 2.3 在线创建及重建索引 - -支持在执行create index和reindex index时指定CONCURRENTLY选项,以不阻塞DML语句执行的方式创建和重建索引,提升索引的可维护性。支持普通表上的索引和分区表全局索引的在线创建和重建。 - -相比于普通的创建和重建索引,在线创建和重建的方式可能需要更长的时间才能完成。 - -列存表上的索引、分区表本地索引和临时表上的索引不支持在线方式创建索引及重建。 - -**相关页面** - -- [CREATE INDEX](50-CREATE-INDEX#CONCURRENTLY) -- [REINDEX](117-REINDEX#CONCURRENTLY) - -### 2.4 增强Oracle兼容能力 - -#### 2.4.1 支持Orafce插件 - -> 说明:需要用户下载plugin包 ,并手工安装使用。 - -通过集成orafce插件,支持以下Oracle兼容语法: - -- SQL Queries - - DUAL table -- SQL Functions - - Mathematical functions - - BITAND - - COSH - - SINH - - TANH - - String functions - - INSTR - - LENGTH - - LENGTHB - - LPAD - - LTRIM - - NLSSORT - - REGEXP_COUNT - - REGEXP_INSTR - - REGEXP_LIKE - - REGEXP_SUBSTR - - REGEXP_REPLACE - - RPAD - - RTRIM - - SUBSTR - - SUBSTRB - - Date/time functions - - ADD_MONTHS - - DBTIMEZONE - - LAST_DAY - - MONTHS_BETWEEN - - NEXT_DAY - - ROUND - - SESSIONTIMEZONE - - SYSDATE - - TRUNC - - Data type formatting functions - - TO_CHAR - - TO_DATE - - TO_MULTI_BYTE - - TO_NUMBER - - TO_SINGLE_BYTE - - Conditional expressions - - DECODE - - LNNVL - - NANVL - - NVL - - NVL2 - - Aggregate functions - - LISTAGG - - MEDIAN - - Functions that return internal information - - DUMP -- SQL Operators - - Datetime operator -- Packages - - DBMS_ALERT - - DBMS_ASSERT - - DBMS_OUTPUT - - DBMS_PIPE - - DBMS_RANDOM - - DBMS_UTILITY - - UTL_FILE - -**相关页面** - -- [orafce](orafce-user-guide) - -#### 2.4.2 支持connect by语法 - -提供兼容oracle的connect by语法,实现层级结构的数据查询控制,并展示等级、循环、起始层级等。 - -提供oracle兼容的层级查询功能,可以按照指定的连接关系,起始条件等按照树状结构展示数据内容,数据层级,路径等。 - -通过start with条件指定层级查询的根行,根据这些行进行递归查询来获取所有子行,以及子行的子行等。 - -通过connect by条件指定层级间父行与子行关系,来确定每一行的满足条件的所有子行。 - -如果存在连接,无论是连接语句,还是from或where子句中,先获取连接后的结果集,然后再进行层级查询。 - -语句中如果存在where过滤条件,先执行层级查询后再将结果集进行过滤,而不是过滤掉不满足的行以及其所有子行。 - -可以通过level伪列查看该行所在层级,sys_connect_by_path查看从根行到该行的路径,以及connect_by_root查看根行等辅助功能。 - -**相关页面** - -- [CONNECT BY](139-CONNECT-BY) - -#### 2.4.3 可更新视图 - -支持可更新视图,用户可以对视图进行Insert/Update/Delete操作,更新操作会直接作用到视图对应基表。 - -不是所有视图都可更新,视图中的行与基表中的行必须要是一一对应关系,即视图的内容不能是基于聚合或窗口函数创建而来。 - -对于多表连接的视图,如果某一个基表的主键(唯一键)可以作为视图的主键(唯一键),则该视图也支持更新,更新结果作用到主键来源的基表。 - -**相关页面** - -- [支持updatable-views](overview-of-system-catalogs-and-system-views#支持updatable-views) +### 2.1 集成openGauss 3.0.0版本新增特性 -#### 2.4.4 重建视图时变更列 +- 行存转向量化 +- 延迟进入最大可用模式 +- 并行逻辑解码 +- CM(Cluster Manager) +- global syscache +- 发布订阅 +- 外键锁增强 +- 行存表压缩 +- Data Studio工具开源 +- MySQL到openGauss的迁移工具chameleon +- 支持使用中间件shardingSphere构建分布式数据库 +- 支持kubernetes部署分布式数据库 +- 支持ANY权限管理 +- DBMind组件化 +- 库内AI算法支持XGBoost、multiclass和PCA -在视图重建时,支持减少列、更改列名操作,本命令仅对非物化视图有效。 +### 2.2 Cluster Manager (CM) -**相关页面** +- 提供了数据库主备的状态监控、网络通信故障监控、文件系统故障监控能力; +- 提供了故障时自动主备切换能力; +- 使用Paxos算法来进行多数派投票,选主; +- 要求至少有三台服务器安装CM组件; +- 数据库服务器可以是一主一备两台机器。 -- [CREATE VIEW](70-CREATE-VIEW#replace) +### 2.3 性能增强 -#### 2.4.5 支持systimestamp函数 +#### 2.3.1 事务异步提交 -返回数据库所在服务器的当前系统日期和时间,以及时区信息。 +- 将事务执行和事务日志落盘拆分为CPU bound和IO bound两个阶段,分别由不同线程执行,避免执行IO操作时,CPU资源闲置,进而提升CPU资源利用率; +- 事务异步提交的优化,可以让事务吞吐量提升20%-50%,TPCC整体性能提升10%~20%; -**相关页面** +#### 2.3.2 日志持久化优化 -- [时间和日期处理函数和操作符](8-date-and-time-processing-functions-and-operators#systimestamp) +- 提高高数据更新负载下执行性能,降低执行延迟。 -#### 2.4.6 支持sys_guid函数 +#### 2.3.3 索引并行创建并行度定义 -系统根据当前时间和机器码,生成并返回一个16字节的全局唯一标识符。 +- MogDB额外提供了参数控制并行度,可以手动制定并行度,更加灵活 -**相关页面** +#### 2.3.4 COPY导入SIMD加速 -- [系统信息函数](23-system-information-functions#sys_guid) +- 利用CPU的指令集,对COPY命令中的数据解析阶段进行加速,进而提升COPY导入性能;(目前仅限x86 CPU) -### 2.5 新增支持PostgreSQL插件 +#### 2.3.5 动态分区裁剪 -> 说明:需要用户下载plugin包 ,并手工安装使用。 +- 新增支持了动态分区裁减。在prepare-execute执行方式,以及分区约束表达式中包含子查询的场景下,在执行阶段根据参数或子查询结果对分区进行裁减,提升分区表查询性能; -- [pg_repack插件](pg_repack-user-guide):通过触发器机制,提供在线重建表的功能,主要用于在线缩减表中的空闲空间大小。 -- [wal2json插件](wal2json-user-guide):通过逻辑复制机制,以json形态提供持续的数据变更内容,主要用于异构复制等情况。 -- [pg_trgm插件](pg_trgm-user-guide):实现trgm分词算法,实现更好的全文检索能力。 -- [pg_prewarm插件](pg_prewarm-user-guide):将指定的数据表预先缓存到共享内存中,加快数据的访问速度。 -- [pg_bulkload插件](pg_bulkload-user-guide):不经过共享内存直接加载数据到数据文件中,加快了数据库批量导入的速度。 +### 2.4 故障诊断 -### 2.6 支持读扩展 +#### 2.4.1 监控Session级别SQL运行状态 -> 说明:配套ShardingSphere 5.1.0及后续版本,需要用户手工下载安装。 +- 对Session级别SQL运行状态进行收集执行计划树并动态采样执行算子 -MogDB通过集成ShardingSphere的Proxy来支持读扩展能力: +#### 2.4.2 OM故障诊断能力增强 -- 读写事务自动路由到主库执行,只读事务自动路由到备库执行;在有更高读一致性要求场景下,也可以通过hint控制只读事务也路由到主库执行; +- gstrace增强:通过增加模块切换(component switch)来获得更有针对性的执行路径,用于提升debug效率。 +- gs_check增强:原有的场景检查基础上,实现检测结果保存,以及对不同时间做的两个检测结果进行差异比较。 +- gs_watch:当MogDB发生故障时,使用此工具收集OS信息、日志信息以及配置文件等信息,来定位问题。 +- gs_gucquery:实现MogDB GUC值自动收集整理导出和差异比较。 -- 支持自动识别并配置读写节点,无需配置主备角色,在配置列表中自动发现主备库; +### 2.5 兼容性增强 -- 支持切换后自动识别主备角色,无需额外操作,自动识别新的主备角色并正确路由; +#### 2.5.1 Oracle兼容增强 -- 支持备节点的自动负载均衡:当备库宕机恢复或者新备库加入时,在备库复制状态正常后,会自动加入到读负载均衡中; +- 更多函数支持,更多内置包支持:dbms_random, dbms_lob, dbms_metadata等 +- 支持connect by语法 +- 降低Oracle应用迁移到MogDB的代码修改量。 -### 2.7 其他 +#### 2.5.2 MySQL兼容增强 -- nlssort函数支持GBK字符集生僻字按拼音排序 +- 更多语法支持:timestamp on update等;更多数据类型兼容;更多函数兼容 +- 降低迁移MySQL应用到MogDB的代码修改量。 - **相关页面**:[SELECT](125-SELECT#nlssort) +#### 2.5.3 PostgreSQL兼容增强 -- ALTER SEQUENCE支持修改increment +##### 2.5.3.1 新增BRIN INDEX (PostgreSQL 9.5开始支持) - **相关页面**:[ALTER SEQUENCE](16-ALTER-SEQUENCE#increment) +- 数据块范围的索引,相比于精准的BTREE索引,BRIN INDEX提供了一个以较小空间消耗获得一个相对较快查询速度的平衡 +- 1GB的表,无索引,查询单条4s;BTREE索引200MB空间,查询4ms;BRIN索引800K,查询58ms; -- 对于TIMESTAMP WITH TIME ZONE类型,可以在TO_CHAR使用TZH,TZM,TZD,TZR参数来输出时区信息 +##### 2.5.3.2 新增BLOOM INDEX(PostgreSQL 9.6开始支持) - **相关页面**:[类型转换函数](9-type-conversion-functions#to_char) +- 布隆过滤:真的不一定为真,假的一定为假;存在误算率,需要recheck(算法实现,不是要用户recheck) +- 适用于表中拥有大量字段,而且查询条件也可能会使用大量字段的组合;仅支持等值查询 +- 普通索引应对此类场景,需要创建多个索引,对于空间占用和插入更新速度都会有较大影响 +- 此时可以在所有这些可能用于查询的字段上统一创建一个BLOOM索引,获得空间和查询速度的平衡,10GB表的扫描可以1s左右完成 -### 2.8 Preview特性 - -> 说明:Preview特性需要通过以下方式手动开启。 -> -> ```sql -> alter system set enable_poc_feature = on; -> -- 或 -> alter system set enable_poc_feature to on; -> -- 或在MogDB数据目录下的postgresql.conf配置文件中加入enable_poc_feature = on -> -- 重启后生效 -> ``` - -#### 2.8.1 行存表压缩 - -支持在创建行存表(astore)时指定是否为压缩表,对于开启压缩的行存表,系统自动对表数据进行压缩,以节省存储空间。当往压缩表写入数据时,系统自动根据每列数据的特征选择合适的压缩算法,用户也可以直接指定每一列使用的压缩算法。 - -实际压缩率和数据内容强相关,典型场景下可达到50%的压缩率;开启压缩特性后有一定性能损耗,典型TPC-C模型下性能损耗在5%以内,实际性能影响取决于实际系统负载情况。 - -对于非压缩表,也可通过Alter Table的方式修改为压缩表,对于后续新写入的数据将自动进行压缩。 - -**相关页面** - -- [CREATE TABLE](60-CREATE-TABLE#COMPRESSION) -- [ALTER TABLE](22-ALTER-TABLE#COMPRESS) - -#### 2.8.2 二级分区 - -支持创建二级分区表,数据自动按照分区方式进行分区存储,以提升大数据量下的存储和查询效率,支持的二级分区组合有: - -- List-List -- List-Range -- List-Hash -- Range-List -- Range-Range -- Range-Hash - -支持对单个Partition和SubPartition进行查询; - -支持对Partition Key,SubPartition Key或其组合条件进行分区裁剪,进一步优化分区查询效率; - -支持对分区表或一级分区进行truncate,vacuum 操作; - -Update操作时,支持数据跨分区移动(不支持Partition/SubPartition Key为List或Hash分区类型); - -支持对二级分区的备份恢复。 - -**相关页面** - -- [CREATE TABLE SUBPARTITION](62.1-CREATE-TABLE-SUBPARTITION) -- [ALTER TABLE SUBPARTITION](23.1-ALTER-TABLE-SUBPARTITION) +##### 2.5.3.3 降低了使用此类索引的PostgreSQL应用迁移到MogDB的难度
## 3. 修复缺陷 -### 3.1 集成openGauss 2.1.0版本修复缺陷 - -- [I435UP](https://gitee.com/opengauss/openGauss-server/issues/I435UP) explain语句执行报错 -- [I44QS6](https://gitee.com/opengauss/openGauss-server/issues/I44QS6) 执行函数select get_local_active_session() limit 1 ;数据库挂掉 -- [I4566H](https://gitee.com/opengauss/openGauss-server/issues/I4566H) 分区表拆分分区update global index 后查询结果与master版本不一致 -- [I45822](https://gitee.com/opengauss/openGauss-server/issues/I45822) 全局临时表的GPC全局计划缓存信息查询存在问题 -- [I442TY](https://gitee.com/opengauss/openGauss-server/issues/I442TY) PITR指定时间戳恢复失败 -- [I45T7A](https://gitee.com/opengauss/openGauss-server/issues/I45T7A) 环境变量分离方式安装的数据库,远程备份异常 -- [I464G5](https://gitee.com/opengauss/openGauss-server/issues/I464G5) gs_ctl build备机重建指定非实例目录失败报错信息不一致 -- [I45TTB](https://gitee.com/opengauss/openGauss-server/issues/I45TTB) file_fdw目前不支持的文件类型创建外表成功,未进行相应报错 -- [I491CN](https://gitee.com/opengauss/openGauss-server/issues/I491CN) cidr类型的网络地址子网掩码为32时,在函数中调用max时报 -- [I496VN](https://gitee.com/opengauss/openGauss-server/issues/I496VN) 备机xlog大量堆积后,修正归档地址,归档失败 -- [I49HRV](https://gitee.com/opengauss/openGauss-server/issues/I49HRV) 开启备机归档,备机归档慢,switchover后新主机异常慢 -- [I492W4](https://gitee.com/opengauss/openGauss-server/issues/I492W4) om安装的数据库,进行mysql_fdw和oracle_fdw外表相关操作数据库core掉 -- [I498QT](https://gitee.com/opengauss/openGauss-server/issues/I498QT) 最大可用模式,同步备参数为ANY2情况下,主机持续压力的情况下,kill-9停止其中一个同步备,主机事务阻塞2s -- [I49L15](https://gitee.com/opengauss/openGauss-server/issues/I49L15) 开启2备机归档,减容+扩容其中一个节点,另一个节点归档异常 -- [I43MTG](https://gitee.com/opengauss/openGauss-server/issues/I43MTG) 新增函数开发者指南无相关资料 -- [I42YW8](https://gitee.com/opengauss/openGauss-server/issues/I42YW8) upsert子查询相关资料未补充 -- [I45WDH](https://gitee.com/opengauss/openGauss-server/issues/I45WDH) file_fdw,不支持fixed格式,在开发者指南支持类型应该去掉相关说明 -- [I484J0](https://gitee.com/opengauss/openGauss-server/issues/I484J0) gs_initdb -T参数未增加校验,且按照指导手册设置后取值不正确 -- [I471CS](https://gitee.com/opengauss/openGauss-server/issues/I471CS) 数据库pgxc_node_name存在‘-’字符时,数据库异常退出,存在残留临时表未清理干净时,无法自动清理和vacuum -- [I40QM1](https://gitee.com/opengauss/openGauss-server/issues/I40QM1) 执行gs_basebackup过程中备节点异常,gs_basebackup进程阻塞无法退出 -- [I3RTQK](https://gitee.com/opengauss/openGauss-server/issues/I3RTQK) 备节点使用gs_basebackup备份失败,提示could not fetch mot checkpoint info: , status:7 - -### 3.2 MogDB 2.1.0版本修复缺陷 - -- gs_probackup恢复数据库时提示which doesn't support recovery_target_lsn -- Statement_history表无法清理 -- 模式级联删除操作引起数据库异常宕机 -- gsql中\d无法查询到同义词对应的表或视图的字段信息 -- lengthb函数不支持blob等大对象字段 -- 在开启sha256验证以后原先md5加密的用户仍然可以通过md5验证成功登陆 -- MogDB中嵌套存储过程内部的raise输出过于细节 - -### 3.3 MogDB 2.1.1版本修复缺陷 - -MogDB 2.1.1是MogDB 2.1.0的补丁版本,于2022年3月22日发布,在MogDB 2.1.0的基础上修复如下内容: - -- 修复pg_encoding_to_char()函数由于参数溢出导致的coredump的缺陷 -- 修复connect by语句作为查询子句时产生的coredump的缺陷 -- 修复在x86平台上connect by语句order by level查询数据顺序不一致的缺陷 +### 3.1 集成openGauss 3.0.0版本修复缺陷 + +- [I4VUXG](https://gitee.com/opengauss/openGauss-server/issues/I4VUXG?from=project-issue) 修复unlogged table 数据丢失问题 +- [I4SF5P](https://gitee.com/opengauss/openGauss-server/issues/I4SF5P?from=project-issue) release版本编译安装数据库,且dblink模块编译安装后,create extension dblink导致数据库core +- [I4S74D](https://gitee.com/opengauss/openGauss-server/issues/I4S74D?from=project-issue) 使用Jmeter工具向行存压缩表插入数据,数据量1G以上时必现失败(5/5),compresstype=2 +- [I4N81J](https://gitee.com/opengauss/openGauss-server/issues/I4N81J?from=project-issue) update/delete操作无法同步到订阅端 +- [I4YPJQ](https://gitee.com/opengauss/openGauss-server/issues/I4YPJQ?from=project-issue) Inserting varchar constant into MOT table using JDBC fails +- [I4PF6G](https://gitee.com/opengauss/openGauss-server/issues/I4PF6G?from=project-issue) 外键锁增强-2.0.0.灰度升级至2.2.0不提交,执行tpcc失败 +- [I4WPD1](https://gitee.com/opengauss/openGauss-server/issues/I4WPD1?from=project-issue) 简化安装模块获取安装包后解压openGauss-2.1.0-CentOS-64bit.tar.bz2缺少simpleinstall目录 无法执行极简安装 +- [I4L268](https://gitee.com/opengauss/openGauss-server/issues/I4L268?from=project-issue) 分区表多次truncate后,再进行vacuum freeze pg\_partition,系统表pg\_partition索引不准确 +- [I3HZJN](https://gitee.com/opengauss/openGauss-server/issues/I3HZJN?from=project-issue) copy命令DATE\_FORMAT缺少时分秒时,未按格式复制 +- [I4HUXD](https://gitee.com/opengauss/openGauss-server/issues/I4HUXD?from=project-issue) jsonb类型查询报错 +- [I4QDN9](https://gitee.com/opengauss/openGauss-server/issues/I4QDN9?from=project-issue) select 1.79E +308\*2,cume\_dist\(\) over\(order by 1.0E128\*1.2\)返回超出范围 +- [I4PAVO](https://gitee.com/opengauss/openGauss-server/issues/I4PAVO?from=project-issue) start with connect by record子查询识别失败 +- [I4UY9A](https://gitee.com/opengauss/openGauss-server/issues/I4UY9A?from=project-issue) opengauss列表分区创建default分区失败 +- [I4W3UB](https://gitee.com/opengauss/openGauss-server/issues/I4W3UB?from=project-issue) 创建并使用自定义类型创建视图,重命名该自定义类型后,无法获取视图定义 +- [I4WRMX](https://gitee.com/opengauss/openGauss-server/issues/I4WRMX?from=project-issue) 重启数据库且enable\_stmt\_track参数关闭时,查询statement\_history表记录应该无记录,实际有记录,statement\_history表的数据未清空 +- [I4WOBH](https://gitee.com/opengauss/openGauss-server/issues/I4WOBH?from=project-issue) GUC设置pagewriter\_sleep为360000后恢复默认值2000,重启库失败 + +
## 4. 兼容性 @@ -344,4 +142,4 @@ MogDB 2.1.1是MogDB 2.1.0的补丁版本,于2022年3月22日发布,在MogDB | openEuler 20.03LTS | ARM(鲲鹏)、X86_64(Intel,AMD,海光,兆芯) | | 银河麒麟V10 | ARM(鲲鹏)、X86_64(Intel,AMD,海光,兆芯) | | 统信UOS V20-D / V20-E | ARM(鲲鹏)、X86_64(Intel,AMD,海光,兆芯) | -| 统信UOS V20-A | X86_64(Intel,AMD,海光,兆芯) | +| 统信UOS V20-A | X86_64(Intel,AMD,海光,兆芯) | \ No newline at end of file diff --git a/product/zh/docs-mogdb/v3.0/administrator-guide/importing-and-exporting-data/importing-data/3-running-the-COPY-FROM-STDIN-statement-to-import-data.md b/product/zh/docs-mogdb/v3.0/administrator-guide/importing-and-exporting-data/importing-data/3-running-the-COPY-FROM-STDIN-statement-to-import-data.md index f3c2560d..89214971 100644 --- a/product/zh/docs-mogdb/v3.0/administrator-guide/importing-and-exporting-data/importing-data/3-running-the-COPY-FROM-STDIN-statement-to-import-data.md +++ b/product/zh/docs-mogdb/v3.0/administrator-guide/importing-and-exporting-data/importing-data/3-running-the-COPY-FROM-STDIN-statement-to-import-data.md @@ -88,7 +88,7 @@ throws SQLException | nodeid | integer | 报错节点编号。 | | begintime | timestamp with time zone | 出现数据格式错误的时间。 | | filename | character varying | 出现数据格式错误的数据源文件名。 | - | rownum | bigint | 在数据源文件中,出现数据格式错误的行号。 | + | rownum | numeric | 在数据源文件中,出现数据格式错误的行号。 | | rawrecord | text | 在数据源文件中,出现数据格式错误的原始记录。 | | detail | text | 详细错误信息。 | diff --git a/product/zh/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md b/product/zh/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md index ac9277d2..1bb81b51 100644 --- a/product/zh/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md +++ b/product/zh/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md @@ -20,7 +20,7 @@ date: 2021-04-27 尽管ISO针对SQL已经发布SQL-92、SQL:1999、SQL:2006等标准,但由于不同数据库自身的特性,使得同样功能在各自产品的实现上不尽相同,这也使得相关的语法规则各有千秋。因此,在制定具体开发规范的时候,需要针对不同数据库来编写相应的规范。 -本规范强调实用性、可操作性,根据开发人员在编码过程中常见的问题和易犯的错误,对代码编写的各方面均进行了详细明确的规范和约束。主要包括下列内容: +本规范强调实用性、可操作性,根据开发人员在编码过程中常见的问题和易犯的错误,对代码编写的各方面均进行了详细明确的规范和约束。主要包括下列内容: - 命名规范 @@ -34,7 +34,7 @@ date: 2021-04-27 - 常用函数 -除此之外,对规范的每条细则均给出具体的范例。 +除此之外,对规范的每条细则均给出具体的范例。 ### 适用范围 @@ -58,11 +58,11 @@ date: 2021-04-27 - 禁止使用保留字,保留关键字参考官方文档。 -- table能包含的column数目,根据字段类型的不同,数目在 250 到 1600 之间 +- table能包含的column数目,根据字段类型的不同,数目在 250 到 1600 之间 ### 临时及备份对象命名 -- 临时或备份的数据库对象名,如table,建议添加日期, 如dba.trade_record_2020_12_08 (其中dba 为DBA专用schema,trade_record为表名,2020_12_08为备份日期)。 +- 临时或备份的数据库对象名,如table,建议添加日期,如dba.trade_record_2020_12_08 (其中dba 为DBA专用schema,trade_record为表名,2020_12_08为备份日期)。 ### 表空间命名 @@ -100,15 +100,15 @@ SELECT 1 ### 变量命名 -- 命名应该使用英文单词,避免使用拼音,特别不应该使用拼音简写。命名不允许使用中文或者特殊字符。 +- 命名应该使用英文单词,避免使用拼音,特别不应该使用拼音简写。命名不允许使用中文或者特殊字符。 -- 如果不涉及复杂运算,一律用number定义计数等简单应用。 +- 如果不涉及复杂运算,一律用number定义计数等简单应用。 ### 分区表命名 - 分区表的表名遵循普通表的正常命名规则。 -- 按时间范围分区(每月一个分区),分区名字为PART_YYYYMM。 +- 按时间范围分区(每月一个分区),分区名字为PART_YYYYMM。 举例: PART_201901、PART_201902 @@ -167,13 +167,11 @@ func_addgroup(增加一个群组) ### partition table设计 -- MogDB/openGauss数据库支持的分区表为范围分区表。 - - 分区表的个数不建议超过1000个。 - 主键或唯一索引必须要包含分区键。 -- 对于数据量比较大的表,根据表数据的属性进行分区,以得到较好的性能。 +- 对于数据量比较大的表,根据表数据的属性进行分区,以得到较好的性能。 - 普通表若要转成分区表,需要新建分区表,然后把普通表中的数据导入到新建的分区表中。因此在初始设计表时,请根据业务提前规划是否使用分区表。 @@ -364,13 +362,13 @@ DROP TABLESPACE - 建议可以采用数值类型的场合,则避免采用字符类型。 -- 建议可以采用varchar(N) 就避免采用char(N), 可以采用varchar(N) 就避免采用text,varchar。 +- 建议可以采用varchar(N) 就避免采用char(N),可以采用varchar(N) 就避免采用text,varchar。 - 只允许用char(N)、varchar(N)及text字符类型。 - MogDB/openGauss新建数据库默认兼容oracle,not null 约束不允许传入空字符串,空字符串默认会转换为null,兼容PG模式的数据库不会有这个问题。 -- 建议使用timestamp with time zone(timestamptz),而不用timestamp without time zone。 +- 建议使用timestamp with time zone(timestamptz),而不用timestamp without time zone。 - 建议使用NUMERIC(precision, scale)来存储货币金额和其它要求精确计算的数值, 而不建议使用real, double precision。 @@ -388,11 +386,11 @@ DROP TABLESPACE - 每个table必须包含主键。 -- 建议不要用有业务含义的名称作为主键,比如身份证或者国家名称,尽管其是unique的。 +- 建议不要用有业务含义的名称作为主键,比如身份证或者国家名称,尽管其是unique的。 - 建议主键的一步到位的写法:id serial primary key 或id bigserial primary key。 -- 建议内容系统中size较大的table主键的等效写法如下,便于后续维护。 +- 建议内容系统中size较大的table主键的等效写法如下,便于后续维护。 ```sql create table test(id serial not null ); @@ -411,7 +409,7 @@ create unique index CONCURRENTLY ON test (id); #### 非空列 -- 所有非空列须在建表之初明确标识“NOT NULL”,上线之后,不再变更。同时需注意 NULL 与空字符‘’的查询结果差别:null 值会转换为 NULL, 而空值无字符显示。 +- 所有非空列须在建表之初明确标识“NOT NULL”,上线之后,不再变更。同时需注意 NULL 与空字符‘’的查询结果差别:null 值会转换为 NULL,而空值无字符显示。 #### 检查约束 @@ -420,20 +418,20 @@ create unique index CONCURRENTLY ON test (id); ### index设计 - MogDB/openGauss 提供的index类型: 行存表支持的索引类型:btree(行存表缺省值)、gin、gist。列存表支持的索引类型:Psort(列存表缺省值)、btree、gin。 -- 建议create 或 drop index 时,加 CONCURRENTLY参数,这是个好习惯,达到与写入数据并发的效果,列存表、分区表和临时表不支持CONCURRENTLY方式创建索引。 -- 建议对于频繁update, delete的包含于index 定义中的column的table, 用create index CONCURRENTLY , drop index CONCURRENTLY的方式进行维护其对应index。 +- 建议create 或 drop index 时,加 CONCURRENTLY参数,这是个好习惯,达到与写入数据并发的效果,列存表、分区表和临时表不支持CONCURRENTLY方式创建索引。 +- 建议对于频繁update、delete的包含于index 定义中的column的table,用create index CONCURRENTLY , drop index CONCURRENTLY的方式进行维护其对应index。 - 建议用unique index 代替unique constraints便于后续维护。 - 建议对where 中带多个字段and条件的高频 query,参考数据分布情况,建多个字段的联合index。 - 每个表的index数量不能超过5个。 - 复合索引的建立需要进行仔细分析: - - 正确选择复合索引中的第一个字段,一般是选择性较好的且在where子句中常用的字段上; - - 复合索引的几个字段是否经常同时以AND方式出现在Where子句中?单字段查询是否极少甚至没有?如果是,则可以建立复合索引;否则考虑单字段索引; - - 如果复合索引中包含的字段经常单独出现在Where子句中,则分解为多个单字段索引; - - 如果既有单字段索引,又有以这个字段为首列的复合索引,一般可考虑删除单字段索引; + - 正确选择复合索引中的第一个字段,一般是选择性较好的且在where子句中常用的字段上; + - 复合索引的几个字段是否经常同时以AND方式出现在Where子句中?单字段查询是否极少甚至没有?如果是,则可以建立复合索引;否则考虑单字段索引; + - 如果复合索引中包含的字段经常单独出现在Where子句中,则分解为多个单字段索引; + - 如果既有单字段索引,又有以这个字段为首列的复合索引,一般可考虑删除单字段索引; - 复合索引第一个字段一般不使用时间字段,因为时间字段多用于范围扫描,而前面的字段使用范围扫描后,后续字段无法用于索引过滤。 - 复合索引字段个数不能超过4个。 -- 频繁DML(写次数明显超过读次数)的表,不要建立太多的索引。 +- 频繁DML(写次数明显超过读次数)的表,不要建立太多的索引。 - 无用的索引以及重复索引应删除,避免对执行计划及数据库性能造成负面影响。 @@ -441,17 +439,17 @@ create unique index CONCURRENTLY ON test (id); - 尽量使用简单视图,尽可能少使用复杂视图。 - 简单视图定义:数据来自单个表,且无分组(DISTINCT/GROUP BY)、无函数。 + 简单视图定义:数据来自单个表,且无分组(DISTINCT/GROUP BY)、无函数。 - 复杂视图定义:数据来自多个表,或有分组,有函数,表的个数不能超过3个。 + 复杂视图定义:数据来自多个表,或有分组,有函数,表的个数不能超过3个。 - 尽量不要使用嵌套视图,如果必须使用,不能超过2层嵌套。 ### function设计 -- 函数必须检索数据库表记录或数据库其他对象,甚至修改(执行Insert、Delete、Update、Drop、Create等操作)数据库信息。 +- 函数必须检索数据库表记录或数据库其他对象,甚至修改(执行Insert、Delete、Update、Drop、Create等操作)数据库信息。 -- 如果某项功能不需要和数据库打交道,则不得通过数据库函数的方式实现。 +- 如果某项功能不需要和数据库打交道,则不得通过数据库函数的方式实现。 - 在函数中避免采用DML或DDL语句。 @@ -589,7 +587,7 @@ f ### 确保使用到所有变量和参数 -- 声明变量也会产生一定的系统开销,并会显得代码不够严谨,在编译时未使用的变量会有告警,需修改以确保没有任何告警。 +- 声明变量也会产生一定的系统开销,并会显得代码不够严谨,在编译时未使用的变量会有告警,需修改以确保没有任何告警。 ## Query操作 @@ -615,11 +613,11 @@ alter table t alter column col set not null; ### DML操作 -- update 时做 <> 判断,比如update table_a set column_b = c where column_b <> c,禁止出现where 1=1 +- update 时做 <> 判断,比如update table_a set column_b = c where column_b <> c,禁止出现where 1=1 - 单条DML语句操作数据量不超过10万 -- 建议清空表时,使用truncate,不建议使用delete +- 建议清空表时,使用truncate,不建议使用delete ### DQL操作 @@ -641,17 +639,17 @@ alter table t alter column col set not null; ### 事务操作 -- 事务中的sql逻辑尽可能的简单,让每个事务的粒度尽可能小,尽量lock少的资源,避免lock 、deadlock的产生,事务执行完及时提交 +- 事务中的sql逻辑尽可能的简单,让每个事务的粒度尽可能小,尽量lock少的资源,避免lock、deadlock的产生,事务执行完及时提交 -- 执行CRAETE、DROP、ALTER等DDL操作, 尤其多条,不要显式的开transaction, 因为加lock的mode非常高,极易产生deadlock +- 执行CRAETE、DROP、ALTER等DDL操作,尤其多条,不要显式的开transaction,因为加lock的mode非常高,极易产生deadlock -- state 为 idle in transaction 的连接,如果出现在Master, 会无谓的lock住相应的资源, 可导致后续产生lock,甚至deadlock; 出现在Slave,可导致卡住主从同步 +- state 为 idle in transaction 的连接,如果出现在Master,会无谓的lock住相应的资源,可导致后续产生lock,甚至deadlock; 出现在Slave,可导致卡住主从同步 ### 其他 -- 建议运行在SSD上的实例, random_page_cost (默认值为4) 设置为1.0~2.0之间, 使查询规划器更倾向于使用索引扫描 +- 建议运行在SSD上的实例,random_page_cost (默认值为4) 设置为1.0~2.0之间,使查询规划器更倾向于使用索引扫描 -- 建议在需要使用explain analyze 查看实际真正执行计划与时间时,如果是写入 query,强烈建议先开启事务, 然后回滚。 +- 建议在需要使用explain analyze查看实际真正执行计划与时间时,如果是写入query,强烈建议先开启事务,然后回滚。 - 对于频繁更新,膨胀率较高的表,应找窗口期执行表重组,降低高水位 @@ -724,7 +722,7 @@ create database dbnam DBCOMPATIBILITY='PG' encoding=’utf8’; #### json类型 -MogDB/openGauss只允许使用json 类型。 +MogDB/openGauss只允许使用json类型。 | 类型 | PostgreSQL | openGauss | 存储尺寸 | 备注 | | ----- | ---------- | --------- | -------- | ---- | diff --git a/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md b/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md deleted file mode 100644 index 98743159..00000000 --- a/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: 约束设计 -summary: 约束设计 -author: Guo Huan -date: 2021-10-14 ---- - -# 约束设计 - -## DEFAULT和NULL约束 - -- 【建议】如果能够从业务层面补全字段值,那么,就不建议使用DEFAULT约束,避免数据加载时产生不符合预期的结果。 -- 【建议】给明确不存在NULL值的字段加上NOT NULL约束,优化器会在特定场景下对其进行自动优化。 -- 【建议】给可以显式命名的约束显式命名。除了NOT NULL和DEFAULT约束外,其他约束都可以显式命名。 - -## 局部聚簇 - -Partial Cluster Key(局部聚簇,简称PCK)是列存表的一种局部聚簇技术,在MogDB中,使用PCK可以通过min/max稀疏索引实现事实表快速过滤扫描。PCK的选取遵循以下原则: - -- 【关注】一张表上只能建立一个PCK,一个PCK可以包含多列,但是一般不建议超过2列。 -- 【建议】在查询中的简单表达式过滤条件上创建PCK。这种过滤条件一般形如col op const,其中col为列名,op为操作符 =、>、>=、<=、<,const为常量值。 -- 【建议】在满足上面条件的前提下,选择distinct值比较多的列上建PCK。 - -## 唯一约束 - -- 【关注】行存表、列存表均支持唯一约束。 -- 【建议】从命名上明确标识唯一约束,例如,命名为“UNI+构成字段”。 - -## 主键约束 - -- 【关注】行存表、列存表均支持主键约束。 -- 【建议】从命名上明确标识主键约束,例如,将主键约束命名为 “PK+字段名”。 - -## 检查约束 - -- 【关注】行存表支持检查约束,而列存表不支持。 -- 【建议】从命名上明确标识检查约束,例如,将检查约束命名为 “CK+字段名”。 diff --git a/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md b/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md deleted file mode 100644 index 1493db0a..00000000 --- a/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Database和Schema设计 -summary: Database和Schema设计 -author: Guo Huan -date: 2021-10-14 ---- - -# Database和Schema设计 - -MogDB中可以使用Database和Schema实现业务的隔离,区别在于Database的隔离更加彻底,各个Database之间共享资源极少,可实现连接隔离、权限隔离等,Database之间无法直接互访。Schema隔离的方式共用资源较多,可以通过grant与revoke语法便捷地控制不同用户对各Schema及其下属对象的权限。 - -- 从便捷性和资源共享效率上考虑,推荐使用Schema进行业务隔离。 -- 建议系统管理员创建Schema和Database,再赋予相关用户对应的权限。 - -## Database设计建议 - -- 【规则】在实际业务中,根据需要创建新的Database,不建议直接使用数据库实例默认的postgres数据库。 -- 【建议】一个数据库实例内,用户自定义的Database数量建议不超过3个。 -- 【建议】为了适应全球化的需求,使数据库编码能够存储与表示绝大多数的字符,建议创建Database的时候使用UTF-8编码。 -- 【关注】创建Database时,需要重点关注字符集编码(ENCODING)和兼容性(DBCOMPATIBILITY)两个配置项。MogDB支持A、B和PG三种兼容模式,分别表示兼容Oracle语法、MySQL语法和PostgreSQL语法,不同兼容模式下的语法行为存在一定差异,默认为A兼容模式。 -- 【关注】Database的owner默认拥有该Database下所有对象的所有权限,包括删除权限。删除权限影响较大,请谨慎使用。 - -## Schema设计建议 - -- 【关注】如果该用户不具有sysadmin权限或者不是该Schema的owner,要访问Schema下的对象,需要同时给用户赋予Schema的usage权限和对象的相应权限。 -- 【关注】如果要在Schema下创建对象,需要授予操作用户该Schema的create权限。 -- 【关注】Schema的owner默认拥有该Schema下对象的所有权限,包括删除权限。删除权限影响较大,请谨慎使用。 diff --git a/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/field-design.md b/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/field-design.md deleted file mode 100644 index 55b455b1..00000000 --- a/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/field-design.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: 字段设计 -summary: 字段设计 -author: Guo Huan -date: 2021-10-14 ---- - -# 字段设计 - -## 选择数据类型 - -在字段设计时,基于查询效率的考虑,一般遵循以下原则: - -- 【建议】尽量使用高效数据类型。 - - 选择数值类型时,在满足业务精度的情况下,选择数据类型的优先级从高到低依次为整数、浮点数、NUMERIC。 - -- 【建议】当多个表存在逻辑关系时,表示同一含义的字段应该使用相同的数据类型。 - -- 【建议】对于字符串数据,建议使用变长字符串数据类型,并指定最大长度。请务必确保指定的最大长度大于需要存储的最大字符数,避免超出最大长度时出现字符截断现象。除非明确知道数据类型为固定长度字符串,否则,不建议使用CHAR(n)、BPCHAR(n)、NCHAR(n)、CHARACTER(n)。 - - 关于字符串类型的详细说明,请参见下文。 - -## 常用字符串类型介绍 - -在进行字段设计时,需要根据数据特征选择相应的数据类型。字符串类型在使用时比较容易混淆,下表列出了MogDB中常见的字符串类型: - -**表 1** 常用字符串类型 - -| **名称** | **描述** | **最大存储空间** | -| :------------------- | :----------------------------------------------------------- | :--------------- | -| CHAR(n) | 定长字符串,n描述了存储的字节长度,如果输入的字符串字节格式小于n,那么后面会自动用空字符补齐至n个字节。 | 10MB | -| CHARACTER(n) | 定长字符串,n描述了存储的字节长度,如果输入的字符串字节格式小于n,那么后面会自动用空字符补齐至n个字节。 | 10MB | -| NCHAR(n) | 定长字符串,n描述了存储的字节长度,如果输入的字符串字节格式小于n,那么后面会自动用空字符补齐至n个字节。 | 10MB | -| BPCHAR(n) | 定长字符串,n描述了存储的字节长度,如果输入的字符串字节格式小于n,那么后面会自动用空字符补齐至n个字节。 | 10MB | -| VARCHAR(n) | 变长字符串,n描述了可以存储的最大字节长度。 | 10MB | -| CHARACTER VARYING(n) | 变长字符串,n描述了可以存储的最大字节长度;此数据类型和VARCHAR(n)是同一数据类型的不同表达形式。 | 10MB | -| VARCHAR2(n) | 变长字符串,n描述了可以存储的最大字节长度,此数据类型是为兼容Oracle类型新增的,行为和VARCHAR(n)一致。 | 10MB | -| NVARCHAR2(n) | 变长字符串,n描述了可以存储的最大字节长度。 | 10MB | -| TEXT | 不限长度(不超过1GB-8203字节)变长字符串。 | 1GB-8203字节 | diff --git a/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/table-design.md b/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/table-design.md deleted file mode 100644 index 9dcd2ba1..00000000 --- a/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/table-design.md +++ /dev/null @@ -1,81 +0,0 @@ ---- -title: 表设计 -summary: 表设计 -author: Guo Huan -date: 2021-10-14 ---- - -# 表设计 - -MogDB是分布式架构。数据分布在各个DN上。总体上讲,良好的表设计需要遵循以下原则: - -- 【关注】将表数据均匀分布在各个DN上。数据均匀分布,可以防止数据在部分DN上集中分布,从而导致因存储倾斜造成数据库实例有效容量下降。通过选择合适的分布列,可以避免数据倾斜。 -- 【关注】将表的扫描压力均匀分散在各个DN上。避免扫描压力集中在部分DN上,而导致性能瓶颈。例如,在事实表上使用等值过滤条件时,将会导致扫描压力不均匀。 -- 【关注】减少需要扫描的数据量。通过分区表的剪枝机制可以大幅减少数据的扫描量。 -- 【关注】尽量减少随机I/O。通过聚簇/局部聚簇可以实现热数据的连续存储,将随机I/O转换为连续I/O,从而减少扫描的I/O代价。 -- 【关注】尽量避免数据shuffle。shuffle,是指在物理上,数据从一个节点,传输到另一个节点。shuffle占用了大量宝贵的网络资源,减小不必要的数据shuffle,可以减少网络压力,使数据的处理本地化,提高数据库实例的性能和可支持的并发度。通过对关联条件和分组条件的仔细设计,能够尽可能地减少不必要的数据shuffle。 - -## 选择存储方案 - -【建议】表的存储类型是表定义设计的第一步,客户业务类型是决定表的存储类型的主要因素,表存储类型的选择依据请参考表1。 - -**表 1** 表的存储类型及场景 - -| 存储类型 | 适用场景 | -| :------- | :----------------------------------------------------------- | -| 行存 | - 点查询(返回记录少,基于索引的简单查询)。
- 增、删、改操作较多的场景。 | -| 列存 | - 统计分析类查询(关联、分组操作较多的场景)。
- 即席查询(查询条件不确定,行存表扫描难以使用索引)。 | - -## 选择分布方案 - -【建议】表的分布方式的选择一般遵循以下原则: - -**表 2** 表的分布方式及使用场景 - -| 分布方式 | 描述 | 适用场景 | -| :---------- | :----------------------------------------------- | :----------------------------- | -| Hash | 表数据通过Hash方式散列到数据库实例中的所有DN上。 | 数据量较大的事实表。 | -| Replication | 数据库实例中每一个DN都有一份全量表数据。 | 维度表、数据量较小的事实表。 | -| Range | 表数据对指定列按照范围进行映射,分布到对应DN。 | 用户需要自定义分布规则的场景。 | -| List | 表数据对指定列按照具体值进行映射,分布到对应DN。 | 用户需要自定义分布规则的场景。 | - -## 选择分区方案 - -当表中的数据量很大时,应当对表进行分区,一般需要遵循以下原则: - -- 【建议】使用具有明显区间性的字段进行分区,比如日期、区域等字段上建立分区。 -- 【建议】分区名称应当体现分区的数据特征。例如,关键字+区间特征。 -- 【建议】将分区上边界的分区值定义为MAXVALUE,以防止可能出现的数据溢出。 - -典型的分区表定义如下: - -```sql -CREATE TABLE staffS_p1 -( - staff_ID NUMBER(6) not null, - FIRST_NAME VARCHAR2(20), - LAST_NAME VARCHAR2(25), - EMAIL VARCHAR2(25), - PHONE_NUMBER VARCHAR2(20), - HIRE_DATE DATE, - employment_ID VARCHAR2(10), - SALARY NUMBER(8,2), - COMMISSION_PCT NUMBER(4,2), - MANAGER_ID NUMBER(6), - section_ID NUMBER(4) -) -PARTITION BY RANGE (HIRE_DATE) -( - PARTITION HIRE_19950501 VALUES LESS THAN ('1995-05-01 00:00:00'), - PARTITION HIRE_19950502 VALUES LESS THAN ('1995-05-02 00:00:00'), - PARTITION HIRE_maxvalue VALUES LESS THAN (MAXVALUE) -); -``` - -## 选择分布键 - -Hash表的分布键选取至关重要,如果分布键选择不当,可能会导致数据倾斜,从而导致查询时,I/O负载集中在部分DN上,影响整体查询性能。因此,在确定Hash表的分布策略之后,需要对表数据进行倾斜性检查,以确保数据的均匀分布。分布键的选择一般需要遵循以下原则: - -- 【建议】选作分布键的字段取值应该比较离散,以便数据能在各个DN上均匀分布。当单个字段无法满足离散条件时,可以考虑使用多个字段一起作为分布键。一般情况下,可以考虑选择表的主键作为分布键。例如,在人员信息表中选择证件号码作为分布键。 -- 【建议】在满足第一条原则的情况下,尽量不要选取在查询中存在常量过滤条件的字段作为分布键。例如,在表dwcjk相关的查询中,字段zqdh存在常量过滤条件“zqdh='000001'”,那么就应当尽量不选择zqdh字段做为分布键。 -- 【建议】在满足前两条原则的情况,尽量选择查询中的关联条件为分布键。当关联条件作为分布键时,join任务的相关数据都分布在DN本地,将极大减少DN之间的数据流动代价。 diff --git a/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md b/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md deleted file mode 100644 index b6d071ac..00000000 --- a/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: 视图和关联表设计 -summary: 视图和关联表设计 -author: Guo Huan -date: 2021-10-14 ---- - -# 视图和关联表设计 - -## 视图设计 - -- 【建议】除非视图之间存在强依赖关系,否则不建议视图嵌套。 -- 【建议】视图定义中尽量避免排序操作。 - -## 关联表设计 - -- 【建议】表之间的关联字段应该尽量少。 -- 【建议】关联字段的数据类型应该保持一致。 -- 【建议】关联字段在命名上,应该可以明显体现出关联关系。例如,采用同样名称来命名。 diff --git a/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-naming-conventions.md b/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-naming-conventions.md deleted file mode 100644 index 797973d2..00000000 --- a/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/database-object-naming-conventions.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: 数据库对象命名 -summary: 数据库对象命名 -author: Guo Huan -date: 2021-10-14 ---- - -# 数据库对象命名 - -数据库对象命名需要满足约束:非时序表长度不超过63个字符,时序表长度不超过53个字符,以字母或下划线开头,中间字符可以是字母、数字、下划线、$、#。 - -- 【建议】避免使用保留或者非保留关键字命名数据库对象。 - - > ![img](https://cdn-mogdb.enmotech.com/docs-media/icon/icon-note.gif) **说明:** 可以使用select * from pg_get_keywords()查询MogDB的关键字,或者在[关键字](2-keywords)章节中查看。 - -- 【建议】避免使用双引号括起来的字符串来定义数据库对象名称,除非需要限制数据库对象名称的大小写。数据库对象名称大小写敏感会使定位问题难度增加。 - -- 【建议】数据库对象命名风格务必保持统一。 - - - 增量开发的业务系统或进行业务迁移的系统,建议遵守历史的命名风格。 - - 建议使用多个单词组成,以下划线分割。 - - 数据库对象名称建议能够望文知意,尽量避免使用自定义缩写(可以使用通用的术语缩写进行命名)。例如,在命名中可以使用具有实际业务含义的英文词汇或汉语拼音,但规则应该在数据库实例范围内保持一致。 - - 变量名的关键是要具有描述性,即变量名称要有一定的意义,变量名要有前缀标明该变量的类型。 - -- 【建议】表对象的命名应该可以表征该表的重要特征。例如,在表对象命名时区分该表是普通表、临时表还是非日志表: - - - 普通表名按照数据集的业务含义命名。 - - 临时表以“tmp_+后缀”命名。 - - 非日志表以“ul_+后缀”命名。 - - 外表以“f_+后缀”命名。 - - 不创建以redis_为前缀的数据库对象。 diff --git a/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md b/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md deleted file mode 100644 index 782d0075..00000000 --- a/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -title: 开发设计建议概述 -summary: 开发设计建议概述 -author: Guo Huan -date: 2021-10-14 ---- - -# 开发设计建议概述 - -本开发设计建议约定数据库建模和数据库应用程序开发过程中,应当遵守的设计规范。依据这些规范进行建模,能够更好的契合MogDB的分布式处理架构,输出更高效的业务SQL代码。 - -本开发设计建议中所陈述的“建议”和“关注”含义如下: - -- **建议**:用户应当遵守的设计规则。遵守这些规则,能够保证业务的高效运行;违反这些规则,将导致业务性能的大幅下降或某些业务逻辑错误。 -- **关注**:在业务开发过程中客户需要注意的细则。用于标识容易导致客户理解错误的知识点(实际上遵守SQL标准的SQL行为),或者程序中潜在的客户不易感知的默认行为。 diff --git a/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/sql-compilation.md b/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/sql-compilation.md deleted file mode 100644 index 046ede24..00000000 --- a/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/sql-compilation.md +++ /dev/null @@ -1,143 +0,0 @@ ---- -title: SQL编写 -summary: SQL编写 -author: Guo Huan -date: 2021-10-14 ---- - -# SQL编写 - -## DDL - -- 【建议】在MogDB中,建议DDL(建表、comments等)操作统一执行,在批处理作业中尽量避免DDL操作。避免大量并发事务对性能的影响。 -- 【建议】在非日志表(unlogged table)使用完后,立即执行数据清理(truncate)操作。因为在异常场景下,MogDB不保证非日志表(unlogged table)数据的安全性。 -- 【建议】临时表和非日志表的存储方式建议和基表相同。当基表为行存(列存)表时,临时表和非日志表也推荐创建为行存(列存)表,可以避免行列混合关联带来的高计算代价。 -- 【建议】索引字段的总长度不超过50字节。否则,索引大小会膨胀比较严重,带来较大的存储开销,同时索引性能也会下降。 -- 【建议】不要使用DROP…CASCADE方式删除对象,除非已经明确对象间的依赖关系,以免误删。 - -## 数据加载和卸载 - -- 【建议】在insert语句中显式给出插入的字段列表。例如: - - ```sql - INSERT INTO task(name,id,comment) VALUES ('task1','100','第100个任务'); - ``` - -- 【建议】在批量数据入库之后,或者数据增量达到一定阈值后,建议对表进行analyze操作,防止统计信息不准确而导致的执行计划劣化。 - -- 【建议】如果要清理表中的所有数据,建议使用truncate table方式,不要使用delete table方式。delete table方式删除性能差,且不会释放那些已经删除了的数据占用的磁盘空间。 - -## 类型转换 - -- 【建议】在需要数据类型转换(不同数据类型进行比较或转换)时,使用强制类型转换,以防隐式类型转换结果与预期不符。 -- 【建议】在查询中,对常量要显式指定数据类型,不要试图依赖任何隐式的数据类型转换。 -- 【关注】若sql_compatibility参数设置为A,在导入数据时,空字符串会自动转化为NULL。如果需要保留空字符串需要sql_compatibility参数设置为C。 - -## 查询操作 - -- 【建议】除ETL程序外,应该尽量避免向客户端返回大量结果集的操作。如果结果集过大,应考虑业务设计是否合理。 - -- 【建议】使用事务方式执行DDL和DML操作。例如,truncate table、update table、delete table、drop table等操作,一旦执行提交就无法恢复。对于这类操作,建议使用事务进行封装,必要时可以进行回滚。 - -- 【建议】在查询编写时,建议明确列出查询涉及的所有字段,不建议使用“SELECT *”这种写法。一方面基于性能考虑,尽量减少查询输出列;另一方面避免增删字段对前端业务兼容性的影响。 - -- 【建议】在访问表对象时带上schema前缀,可以避免因schema切换导致访问到非预期的表。 - -- 【建议】超过3张表或视图进行关联(特别是full join)时,执行代价难以估算。建议使用WITH TABLE AS语句创建中间临时表的方式增加SQL语句的可读性。 - -- 【建议】尽量避免使用笛卡尔积和Full join。这些操作会造成结果集的急剧膨胀,同时其执行性能也很低。 - -- 【关注】NULL值的比较只能使用IS NULL或者IS NOT NULL的方式判断,其他任何形式的逻辑判断都返回NULL。例如:NULL<>NULL、NULL=NULL和NULL<>1返回结果都是NULL,而不是期望的布尔值。 - -- 【关注】需要统计表中所有记录数时,不要使用count(col)来替代count(*)。count(*)会统计NULL值(真实行数),而count(col)不会统计。 - -- 【关注】在执行count(col)时,将“值为NULL”的记录行计数为0。在执行sum(col)时,当所有记录都为NULL时,最终将返回NULL;当不全为NULL时,“值为NULL”的记录行将被计数为0。 - -- 【关注】count(多个字段)时,多个字段名必须用圆括号括起来。例如,count( (col1,col2,col3) )。注意:通过多字段统计行数时,即使所选字段都为NULL,该行也被计数,效果与count(*)一致。 - -- 【关注】count(distinct col)用来计算该列不重复的非NULL的数量,NULL将不被计数。 - -- 【关注】count(distinct (col1,col2,…))用来统计多列的唯一值数量,当所有统计字段都为NULL时,也会被计数,同时这些记录被认为是相同的。 - -- 【建议】使用连接操作符“||”替换concat函数进行字符串连接。因为concat函数生成的执行计划不能下推,导致查询性能严重劣化。 - -- 【建议】使用下面时间相关的宏替换now函数来获取当前时间。因为now函数生成的执行计划无法下推,导致查询性能严重劣化。 - - **表 1** 时间相关的宏 - - | **宏名称** | **描述** | **示例** | - | :------------------- | :----------------------------------------------------------- | :----------------------------------------------------------- | - | CURRENT_DATE | 获取当前日期,不包含时分秒。 | `mogdb=# select CURRENT_DATE; date ----- 2018-02-02 (1 row)` | - | CURRENT_TIME | 获取当前时间,不包含年月日。 | `mogdb=# select CURRENT_TIME; timetz -------- 00:39:34.633938+08 (1 row)` | - | CURRENT_TIMESTAMP(n) | 获取当前日期和时间,包含年月日时分秒。
说明:
n表示存储的毫秒位数。 | `mogdb=# select CURRENT_TIMESTAMP(6); timestamptz ----------- 2018-02-02 00:39:55.231689+08 (1 row)` | - -- 【建议】尽量避免标量子查询语句的出现。标量子查询是出现在select语句输出列表中的子查询,在下面例子中,下划线部分即为一个标量子查询语句: - - ```sql - SELECT id, (SELECT COUNT(*) FROM films f WHERE f.did = s.id) FROM staffs_p1 s; - ``` - - 标量子查询往往会导致查询性能急剧劣化,在应用开发过程中,应当根据业务逻辑,对标量子查询进行等价转换,将其写为表关联。 - -- 【建议】在where子句中,应当对过滤条件进行排序,把选择读较小(筛选出的记录数较少)的条件排在前面。 - -- 【建议】where子句中的过滤条件,尽量符合单边规则。即把字段名放在比较条件的一边,优化器在某些场景下会自动进行剪枝优化。形如col op expression,其中col为表的一个列,op为‘=’、‘>’的等比较操作符,expression为不含列名的表达式。例如, - - ```sql - SELECT id, from_image_id, from_person_id, from_video_id FROM face_data WHERE current_timestamp(6) - time < '1 days'::interval; - ``` - - 改写为: - - ```sql - SELECT id, from_image_id, from_person_id, from_video_id FROM face_data where time > current_timestamp(6) - '1 days'::interval; - ``` - -- 【建议】尽量避免不必要的排序操作。排序需要耗费大量的内存及CPU,如果业务逻辑许可,可以组合使用order by和limit,减小资源开销。MogDB默认按照ASC & NULL LAST进行排序。 - -- 【建议】使用ORDER BY子句进行排序时,显式指定排序方式(ASC/DESC),NULL的排序方式(NULL FIRST/NULL LAST)。 - -- 【建议】不要单独依赖limit子句返回特定顺序的结果集。如果部分特定结果集,可以将ORDER BY子句与Limit子句组合使用,必要时也可以使用offset跳过特定结果。 - -- 【建议】在保障业务逻辑准确的情况下,建议尽量使用UNION ALL来代替UNION。 - -- 【建议】如果过滤条件只有OR表达式,可以将OR表达式转化为UNION ALL以提升性能。使用OR的SQL语句经常无法优化,导致执行速度慢。例如,将下面语句 - - ```sql - SELECT * FROM scdc.pub_menu - WHERE (cdp= 300 AND inline=301) OR (cdp= 301 AND inline=302) OR (cdp= 302 ANDinline=301); - ``` - - 转换为: - - ```sql - SELECT * FROM scdc.pub_menu - WHERE (cdp= 300 AND inline=301) - union all - SELECT * FROM scdc.pub_menu - WHERE (cdp= 301 AND inline=302) - union all - SELECT * FROM tablename - WHERE (cdp= 302 AND inline=301) - ``` - -- 【建议】当in(val1, val2, val3…)表达式中字段较多时,建议使用in (values (va11), (val2),(val3)…)语句进行替换。优化器会自动把in约束转换为非关联子查询,从而提升查询性能。 - -- 【建议】在关联字段不存在NULL值的情况下,使用(not) exist代替(not) in。例如,在下面查询语句中,当T1.C1列不存在NULL值时,可以先为T1.C1字段添加NOT NULL约束,再进行如下改写。 - - ```sql - SELECT * FROM T1 WHERE T1.C1 NOT IN (SELECT T2.C2 FROM T2); - ``` - - 可以改写为: - - ```sql - SELECT * FROM T1 WHERE NOT EXISTS (SELECT * FROM T1,T2 WHERE T1.C1=T2.C2); - ``` - - > ![img](https://cdn-mogdb.enmotech.com/docs-media/icon/icon-note.gif) **说明:** - > - > - 如果不能保证T1.C1列的值为NOT NULL的情况下,就不能进行上述改写。 - > - 如果T1.C1为子查询的输出,要根据业务逻辑确认其输出是否为NOT NULL。 - -- 【建议】通过游标进行翻页查询,而不是使用LIMIT OFFSET语法,避免多次执行带来的资源开销。游标必须在事务中使用,执行完后务必关闭游标并提交事务。 diff --git a/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md b/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md deleted file mode 100644 index 7ef8ae23..00000000 --- a/product/zh/docs-mogdb/v3.0/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: JDBC配置 -summary: JDBC配置 -author: Guo Huan -date: 2021-10-14 ---- - -# JDBC配置 - -目前,MogDB相关的第三方工具都是通过JDBC进行连接的,此部分将介绍工具配置时的注意事项。 - -## 连接参数 - -- 【关注】第三方工具通过JDBC连接MogDB时,JDBC向MogDB发起连接请求,会默认添加以下配置参数,详见JDBC代码ConnectionFactoryImpl类的实现。 - - ``` - params = { - { "user", user }, - { "database", database }, - { "client_encoding", "UTF8" }, - { "DateStyle", "ISO" }, - { "extra_float_digits", "2" }, - { "TimeZone", createPostgresTimeZone() }, - }; - ``` - - 这些参数可能会导致JDBC客户端的行为与gsql客户端的行为不一致,例如,Date数据显示方式、浮点数精度表示、timezone显示。 - - 如果实际期望和这些配置不符,建议在java连接设置代码中显式设定这些参数。 - -- 【建议】通过JDBC连接数据库时,应该保证下面三个时区设置一致: - - - JDBC客户端所在主机的时区。 - - - MogDB数据库实例所在主机的时区。 - - - MogDB数据库实例配置过程中时区。 - - > ![img](https://cdn-mogdb.enmotech.com/docs-media/icon/icon-note.gif) **说明:** - > 时区设置相关的操作,请参考《安装指南》中“[设置时区和时间](3-modifying-os-configuration#设置时区和时间)“部分内容。 - -## fetchsize - -【关注】在应用程序中,如果需要使用fetchsize,必须关闭autocommit。开启autocommit,会令fetchsize配置失效。 - -## autocommit - -【建议】在JDBC向MogDB申请连接的代码中,建议显式打开autocommit开关。如果基于性能或者其它方面考虑,需要关闭autocommit时,需要应用程序自己来保证事务的提交。例如,在指定的业务SQL执行完之后做显式提交,特别是客户端退出之前务必保证所有的事务已经提交。 - -## 释放连接 - -【建议】推荐使用连接池限制应用程序的连接数。每执行一条SQL就连接一次数据库,是一种不好SQL的编写习惯。 - -【建议】在应用程序完成作业任务之后,应当及时断开和MogDB的连接,释放资源。建议在任务中设置session超时时间参数。 - -【建议】使用JDBC连接池,在将连接释放给连接池前,需要执行以下操作,重置会话环境。否则,可能会因为历史会话信息导致的对象冲突。 - -- 如果在连接中设置了GUC参数,那么在将连接归还连接池之前,必须使用“SET SESSION AUTHORIZATION DEFAULT;RESET ALL;”将连接的状态清空。 -- 如果使用了临时表,那么在将连接归还连接池之前,必须将临时表删除。 - -## CopyManager - -【建议】在不使用ETL工具,数据入库实时性要求又比较高的情况下,建议在开发应用程序时,使用MogDB JDBC驱动的copyManger接口进行微批导入。 diff --git a/product/zh/docs-mogdb/v3.0/installation-guide/manual-installation.md b/product/zh/docs-mogdb/v3.0/installation-guide/manual-installation.md index 29883c9a..0957ba77 100644 --- a/product/zh/docs-mogdb/v3.0/installation-guide/manual-installation.md +++ b/product/zh/docs-mogdb/v3.0/installation-guide/manual-installation.md @@ -31,7 +31,8 @@ date: 2021-10-14 useradd omm -g 2000 -u 2000 echo "Enmo@123" | passwd --stdin omm mkdir -p /opt/mogdb/software - chown -R omm:dbgrp /opt/software/mogdb + mkdir -p /opt/mogdb/data + chown -R omm:dbgrp /opt/mogdb ``` 4. 上传并解压二进制文件 diff --git a/product/zh/docs-mogdb/v3.0/toc.md b/product/zh/docs-mogdb/v3.0/toc.md index aa039c44..9137954c 100644 --- a/product/zh/docs-mogdb/v3.0/toc.md +++ b/product/zh/docs-mogdb/v3.0/toc.md @@ -192,18 +192,6 @@ + [WDR解读指南](/performance-tuning/wdr-snapshot-schema.md) + [TPCC性能优化指南](/performance-tuning/TPCC-performance-tuning-guide.md) + 开发者指南 - + 开发设计建议 - + [概述](/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md) - + [数据库对象命名](/developer-guide/development-and-design-proposal/database-object-naming-conventions.md) - + 数据库对象设计 - + [Database和Schema设计](/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md) - + [表设计](/developer-guide/development-and-design-proposal/database-object-design/table-design.md) - + [字段设计](/developer-guide/development-and-design-proposal/database-object-design/field-design.md) - + [约束设计](/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md) - + [视图和关联表设计](/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md) - + 工具对接 - + [JDBC配置](/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md) - + [SQL编写](/developer-guide/development-and-design-proposal/sql-compilation.md) + 应用程序开发教程 + [开发规范](/developer-guide/dev/1-development-specifications.md) + 基于JDBC开发 diff --git a/product/zh/docs-mogdb/v3.0/toc_dev.md b/product/zh/docs-mogdb/v3.0/toc_dev.md index dc64c332..72e00260 100644 --- a/product/zh/docs-mogdb/v3.0/toc_dev.md +++ b/product/zh/docs-mogdb/v3.0/toc_dev.md @@ -4,18 +4,6 @@ ## 开发者指南 -+ 开发设计建议 - + [概述](/developer-guide/development-and-design-proposal/overview-of-development-and-design-proposal.md) - + [数据库对象命名](/developer-guide/development-and-design-proposal/database-object-naming-conventions.md) - + 数据库对象设计 - + [Database和Schema设计](/developer-guide/development-and-design-proposal/database-object-design/database-and-schema-design.md) - + [表设计](/developer-guide/development-and-design-proposal/database-object-design/table-design.md) - + [字段设计](/developer-guide/development-and-design-proposal/database-object-design/field-design.md) - + [约束设计](/developer-guide/development-and-design-proposal/database-object-design/constraint-design.md) - + [视图和关联表设计](/developer-guide/development-and-design-proposal/database-object-design/view-and-joined-table-design.md) - + 工具对接 - + [JDBC配置](/developer-guide/development-and-design-proposal/tool-interconnection/jdbc-configuration.md) - + [SQL编写](/developer-guide/development-and-design-proposal/sql-compilation.md) + 应用程序开发教程 + [开发规范](/developer-guide/dev/1-development-specifications.md) + 基于JDBC开发 -- Gitee From d01635a47de3ab276b79a2e2acbd38ae05308f39 Mon Sep 17 00:00:00 2001 From: spaceoddity91719 Date: Tue, 19 Apr 2022 17:17:12 +0800 Subject: [PATCH 2/6] fix markdown --- .../v3.0/about-mogdb/mogdb-release-notes.md | 32 +++++++++---------- .../v3.0/about-mogdb/mogdb-release-notes.md | 32 +++++++++---------- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/product/en/docs-mogdb/v3.0/about-mogdb/mogdb-release-notes.md b/product/en/docs-mogdb/v3.0/about-mogdb/mogdb-release-notes.md index b883c497..29239b73 100644 --- a/product/en/docs-mogdb/v3.0/about-mogdb/mogdb-release-notes.md +++ b/product/en/docs-mogdb/v3.0/about-mogdb/mogdb-release-notes.md @@ -112,22 +112,22 @@ MogDB version 3.0 is further enhanced based on MogDB version 2.1 and incorporate ### 3.1 Incorporate openGauss 3.0.0 Modified Defects -- [I4VUXG](https://gitee.com/opengauss/openGauss-server/issues/I4VUXG?from=project-issue): Fixed the data loss issue of unlogged tables. -- [I4SF5P](https://gitee.com/opengauss/openGauss-server/issues/I4SF5P?from=project-issue): Fixed the core dump issue occurred by running create extension dblink after the database is compiled and installed in the release version, and the dblink module is compiled and installed. -- [I4S74D](https://gitee.com/opengauss/openGauss-server/issues/I4S74D?from=project-issue): Fixed the issue of failing to insert data (5⁄5) into a row-store compressed table using Jmeter when the data volume is greater than 1 GB. The compression type is set to **2**. -- [I4N81J](https://gitee.com/opengauss/openGauss-server/issues/I4N81J?from=project-issue): Fixed the issue of failing to synchronize the UPDATE and DELETE operations to subscribers. -- [I4YPJQ](https://gitee.com/opengauss/openGauss-server/issues/I4YPJQ?from=project-issue): Fixed the issue of failing to insert varchar constants into MOTs using JDBC. -- [I4PF6G](https://gitee.com/opengauss/openGauss-server/issues/I4PF6G?from=project-issue): Fixed the issue of TPC-C execution failure during foreign key lock enhancement and gray upgrade from 2.0.0. to 2.2.0 (not committed). -- [I4WPD1](https://gitee.com/opengauss/openGauss-server/issues/I4WPD1?from=project-issue): Fixed the issue of failing to execute simplified installation because the **openGauss-2.1.0-CentOS-64bit.tar.bz2** file is missing in the decompressed installation package. -- [I4L268](https://gitee.com/opengauss/openGauss-server/issues/I4L268?from=project-issue): Fixed the issue of incorrect system catalog **pg_partition** after the partitioned table is truncated for multiple times and then the **vacuum freeze pg_partition** command is executed. -- [I3HZJN](https://gitee.com/opengauss/openGauss-server/issues/I3HZJN?from=project-issue): Fixed the issue of incorrect date format when the **copy** command is executed. -- [I4HUXD](https://gitee.com/opengauss/openGauss-server/issues/I4HUXD?from=project-issue): Fixed the issue of failing to query the JSONB type. -- [I4QDN9](https://gitee.com/opengauss/openGauss-server/issues/I4QDN9?from=project-issue): Fixed the issue of returning a value for **select 1.79E +308\*2,cume_dist() over(order by 1.0E128\*1.2)** out of range. -- [I4PAVO](https://gitee.com/opengauss/openGauss-server/issues/I4PAVO?from=project-issue): Fixed the issue of failing to identify the **start with connect by record** subquery. -- [I4UY9A](https://gitee.com/opengauss/openGauss-server/issues/I4UY9A?from=project-issue): Fixed the issue of failing to create the default partition during list partitioning. -- [I4W3UB](https://gitee.com/opengauss/openGauss-server/issues/I4W3UB?from=project-issue): Fixed the issue of failing to obtain the view definition when the view is created using a user-defined type and the user-defined type is renamed. -- [I4WRMX](https://gitee.com/opengauss/openGauss-server/issues/I4WRMX?from=project-issue): Fixed the issue of failing to clear data in the **statement_history** table. When the database restarts and the **enable_stmt_track** parameter is disabled, no record should be found in the **statement_history** table. -- [I4WOBH](https://gitee.com/opengauss/openGauss-server/issues/I4WOBH?from=project-issue): Fixed the issue of failing to restart the database by setting GUC parameter **pagewriter_sleep** from **360000** to **2000**. +- [I4VUXG](https://gitee.com/opengauss/openGauss-server/issues/I4VUXG?from=project-issue): Fixed the data loss issue of unlogged tables. +- [I4SF5P](https://gitee.com/opengauss/openGauss-server/issues/I4SF5P?from=project-issue): Fixed the core dump issue occurred by running create extension dblink after the database is compiled and installed in the release version, and the dblink module is compiled and installed. +- [I4S74D](https://gitee.com/opengauss/openGauss-server/issues/I4S74D?from=project-issue): Fixed the issue of failing to insert data (5⁄5) into a row-store compressed table using Jmeter when the data volume is greater than 1 GB. The compression type is set to **2**. +- [I4N81J](https://gitee.com/opengauss/openGauss-server/issues/I4N81J?from=project-issue): Fixed the issue of failing to synchronize the UPDATE and DELETE operations to subscribers. +- [I4YPJQ](https://gitee.com/opengauss/openGauss-server/issues/I4YPJQ?from=project-issue): Fixed the issue of failing to insert varchar constants into MOTs using JDBC. +- [I4PF6G](https://gitee.com/opengauss/openGauss-server/issues/I4PF6G?from=project-issue): Fixed the issue of TPC-C execution failure during foreign key lock enhancement and gray upgrade from 2.0.0. to 2.2.0 (not committed). +- [I4WPD1](https://gitee.com/opengauss/openGauss-server/issues/I4WPD1?from=project-issue): Fixed the issue of failing to execute simplified installation because the **openGauss-2.1.0-CentOS-64bit.tar.bz2** file is missing in the decompressed installation package. +- [I4L268](https://gitee.com/opengauss/openGauss-server/issues/I4L268?from=project-issue): Fixed the issue of incorrect system catalog **pg_partition** after the partitioned table is truncated for multiple times and then the **vacuum freeze pg_partition** command is executed. +- [I3HZJN](https://gitee.com/opengauss/openGauss-server/issues/I3HZJN?from=project-issue): Fixed the issue of incorrect date format when the **copy** command is executed. +- [I4HUXD](https://gitee.com/opengauss/openGauss-server/issues/I4HUXD?from=project-issue): Fixed the issue of failing to query the JSONB type. +- [I4QDN9](https://gitee.com/opengauss/openGauss-server/issues/I4QDN9?from=project-issue): Fixed the issue of returning a value for **select 1.79E +308\*2,cume_dist() over(order by 1.0E128\*1.2)** out of range. +- [I4PAVO](https://gitee.com/opengauss/openGauss-server/issues/I4PAVO?from=project-issue): Fixed the issue of failing to identify the **start with connect by record** subquery. +- [I4UY9A](https://gitee.com/opengauss/openGauss-server/issues/I4UY9A?from=project-issue): Fixed the issue of failing to create the default partition during list partitioning. +- [I4W3UB](https://gitee.com/opengauss/openGauss-server/issues/I4W3UB?from=project-issue): Fixed the issue of failing to obtain the view definition when the view is created using a user-defined type and the user-defined type is renamed. +- [I4WRMX](https://gitee.com/opengauss/openGauss-server/issues/I4WRMX?from=project-issue): Fixed the issue of failing to clear data in the **statement_history** table. When the database restarts and the **enable_stmt_track** parameter is disabled, no record should be found in the **statement_history** table. +- [I4WOBH](https://gitee.com/opengauss/openGauss-server/issues/I4WOBH?from=project-issue): Fixed the issue of failing to restart the database by setting GUC parameter **pagewriter_sleep** from **360000** to **2000**.
diff --git a/product/zh/docs-mogdb/v3.0/about-mogdb/mogdb-release-notes.md b/product/zh/docs-mogdb/v3.0/about-mogdb/mogdb-release-notes.md index d2b89ecd..1c5dd6f2 100644 --- a/product/zh/docs-mogdb/v3.0/about-mogdb/mogdb-release-notes.md +++ b/product/zh/docs-mogdb/v3.0/about-mogdb/mogdb-release-notes.md @@ -112,22 +112,22 @@ MogDB 3.0版本基于MogDB 2.1版本进一步增强,并合入了openGauss 3.0. ### 3.1 集成openGauss 3.0.0版本修复缺陷 -- [I4VUXG](https://gitee.com/opengauss/openGauss-server/issues/I4VUXG?from=project-issue) 修复unlogged table 数据丢失问题 -- [I4SF5P](https://gitee.com/opengauss/openGauss-server/issues/I4SF5P?from=project-issue) release版本编译安装数据库,且dblink模块编译安装后,create extension dblink导致数据库core -- [I4S74D](https://gitee.com/opengauss/openGauss-server/issues/I4S74D?from=project-issue) 使用Jmeter工具向行存压缩表插入数据,数据量1G以上时必现失败(5/5),compresstype=2 -- [I4N81J](https://gitee.com/opengauss/openGauss-server/issues/I4N81J?from=project-issue) update/delete操作无法同步到订阅端 -- [I4YPJQ](https://gitee.com/opengauss/openGauss-server/issues/I4YPJQ?from=project-issue) Inserting varchar constant into MOT table using JDBC fails -- [I4PF6G](https://gitee.com/opengauss/openGauss-server/issues/I4PF6G?from=project-issue) 外键锁增强-2.0.0.灰度升级至2.2.0不提交,执行tpcc失败 -- [I4WPD1](https://gitee.com/opengauss/openGauss-server/issues/I4WPD1?from=project-issue) 简化安装模块获取安装包后解压openGauss-2.1.0-CentOS-64bit.tar.bz2缺少simpleinstall目录 无法执行极简安装 -- [I4L268](https://gitee.com/opengauss/openGauss-server/issues/I4L268?from=project-issue) 分区表多次truncate后,再进行vacuum freeze pg\_partition,系统表pg\_partition索引不准确 -- [I3HZJN](https://gitee.com/opengauss/openGauss-server/issues/I3HZJN?from=project-issue) copy命令DATE\_FORMAT缺少时分秒时,未按格式复制 -- [I4HUXD](https://gitee.com/opengauss/openGauss-server/issues/I4HUXD?from=project-issue) jsonb类型查询报错 -- [I4QDN9](https://gitee.com/opengauss/openGauss-server/issues/I4QDN9?from=project-issue) select 1.79E +308\*2,cume\_dist\(\) over\(order by 1.0E128\*1.2\)返回超出范围 -- [I4PAVO](https://gitee.com/opengauss/openGauss-server/issues/I4PAVO?from=project-issue) start with connect by record子查询识别失败 -- [I4UY9A](https://gitee.com/opengauss/openGauss-server/issues/I4UY9A?from=project-issue) opengauss列表分区创建default分区失败 -- [I4W3UB](https://gitee.com/opengauss/openGauss-server/issues/I4W3UB?from=project-issue) 创建并使用自定义类型创建视图,重命名该自定义类型后,无法获取视图定义 -- [I4WRMX](https://gitee.com/opengauss/openGauss-server/issues/I4WRMX?from=project-issue) 重启数据库且enable\_stmt\_track参数关闭时,查询statement\_history表记录应该无记录,实际有记录,statement\_history表的数据未清空 -- [I4WOBH](https://gitee.com/opengauss/openGauss-server/issues/I4WOBH?from=project-issue) GUC设置pagewriter\_sleep为360000后恢复默认值2000,重启库失败 +- [I4VUXG](https://gitee.com/opengauss/openGauss-server/issues/I4VUXG?from=project-issue) 修复unlogged table 数据丢失问题 +- [I4SF5P](https://gitee.com/opengauss/openGauss-server/issues/I4SF5P?from=project-issue) release版本编译安装数据库,且dblink模块编译安装后,create extension dblink导致数据库core +- [I4S74D](https://gitee.com/opengauss/openGauss-server/issues/I4S74D?from=project-issue) 使用Jmeter工具向行存压缩表插入数据,数据量1G以上时必现失败(5/5),compresstype=2 +- [I4N81J](https://gitee.com/opengauss/openGauss-server/issues/I4N81J?from=project-issue) update/delete操作无法同步到订阅端 +- [I4YPJQ](https://gitee.com/opengauss/openGauss-server/issues/I4YPJQ?from=project-issue) Inserting varchar constant into MOT table using JDBC fails +- [I4PF6G](https://gitee.com/opengauss/openGauss-server/issues/I4PF6G?from=project-issue) 外键锁增强-2.0.0.灰度升级至2.2.0不提交,执行tpcc失败 +- [I4WPD1](https://gitee.com/opengauss/openGauss-server/issues/I4WPD1?from=project-issue) 简化安装模块获取安装包后解压openGauss-2.1.0-CentOS-64bit.tar.bz2缺少simpleinstall目录 无法执行极简安装 +- [I4L268](https://gitee.com/opengauss/openGauss-server/issues/I4L268?from=project-issue) 分区表多次truncate后,再进行vacuum freeze pg\_partition,系统表pg\_partition索引不准确 +- [I3HZJN](https://gitee.com/opengauss/openGauss-server/issues/I3HZJN?from=project-issue) copy命令DATE\_FORMAT缺少时分秒时,未按格式复制 +- [I4HUXD](https://gitee.com/opengauss/openGauss-server/issues/I4HUXD?from=project-issue) jsonb类型查询报错 +- [I4QDN9](https://gitee.com/opengauss/openGauss-server/issues/I4QDN9?from=project-issue) select 1.79E +308\*2,cume\_dist\(\) over\(order by 1.0E128\*1.2\)返回超出范围 +- [I4PAVO](https://gitee.com/opengauss/openGauss-server/issues/I4PAVO?from=project-issue) start with connect by record子查询识别失败 +- [I4UY9A](https://gitee.com/opengauss/openGauss-server/issues/I4UY9A?from=project-issue) opengauss列表分区创建default分区失败 +- [I4W3UB](https://gitee.com/opengauss/openGauss-server/issues/I4W3UB?from=project-issue) 创建并使用自定义类型创建视图,重命名该自定义类型后,无法获取视图定义 +- [I4WRMX](https://gitee.com/opengauss/openGauss-server/issues/I4WRMX?from=project-issue) 重启数据库且enable\_stmt\_track参数关闭时,查询statement\_history表记录应该无记录,实际有记录,statement\_history表的数据未清空 +- [I4WOBH](https://gitee.com/opengauss/openGauss-server/issues/I4WOBH?from=project-issue) GUC设置pagewriter\_sleep为360000后恢复默认值2000,重启库失败
-- Gitee From 0e4b53925e1198cc5d7b15414f796b58136741bd Mon Sep 17 00:00:00 2001 From: spaceoddity91719 Date: Tue, 19 Apr 2022 22:06:36 +0800 Subject: [PATCH 3/6] =?UTF-8?q?update:mot=E5=86=85=E5=AD=98=E8=A1=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../mot-engine/2-using-mot/6-mot-sample-tpcc-benchmark.md | 2 +- .../mot-engine/2-using-mot/6-mot-sample-tpcc-benchmark.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/product/en/docs-mogdb/v3.0/administrator-guide/mot-engine/2-using-mot/6-mot-sample-tpcc-benchmark.md b/product/en/docs-mogdb/v3.0/administrator-guide/mot-engine/2-using-mot/6-mot-sample-tpcc-benchmark.md index e0fb87fe..acee5fc1 100644 --- a/product/en/docs-mogdb/v3.0/administrator-guide/mot-engine/2-using-mot/6-mot-sample-tpcc-benchmark.md +++ b/product/en/docs-mogdb/v3.0/administrator-guide/mot-engine/2-using-mot/6-mot-sample-tpcc-benchmark.md @@ -58,7 +58,7 @@ Follow the instructions in the **MOT Server Optimization - x86** section. The fo For example, to test TPCC, the **BenchmarkSQL** can be used, as follows - - Download **benchmarksql** from the following link - -- The schema creation scripts in the **benchmarksql** tool need to be adjusted to MOT syntax and unsupported DDLs need to be avoided. The adjusted scripts can be directly downloaded from the following link - . The contents of this tar file includes sql.common.mogdb.mot folder and jTPCCTData.java file as well as a sample configuration file postgresql.conf and a TPCC properties file props.mot for reference. +- The schema creation scripts in the **benchmarksql** tool need to be adjusted to MOT syntax and unsupported DDLs need to be avoided. The adjusted scripts can be directly downloaded from the following link - . The contents of this tar file includes sql.common.mogdb.mot folder and jTPCCTData.java file as well as a sample configuration file postgresql.conf and a TPCC properties file props.mot for reference. - Place the sql.common.mogdb.mot folder in the same level as sql.common under run folder and replace the file src/client/jTPCCTData.java with the downloaded java file. - Edit the file runDatabaseBuild.sh under run folder to remove **extraHistID** from **AFTER_LOAD** list to avoid unsupported alter table DDL. - Replace the JDBC driver under lib/postgres folder with the MogDB JDBC driver available from the following link - . diff --git a/product/zh/docs-mogdb/v3.0/administrator-guide/mot-engine/2-using-mot/6-mot-sample-tpcc-benchmark.md b/product/zh/docs-mogdb/v3.0/administrator-guide/mot-engine/2-using-mot/6-mot-sample-tpcc-benchmark.md index e9b1daa0..9c1d841b 100644 --- a/product/zh/docs-mogdb/v3.0/administrator-guide/mot-engine/2-using-mot/6-mot-sample-tpcc-benchmark.md +++ b/product/zh/docs-mogdb/v3.0/administrator-guide/mot-engine/2-using-mot/6-mot-sample-tpcc-benchmark.md @@ -58,10 +58,10 @@ tpm-C指标单位表示为每分钟事务数-C,而C表示TPC-C特定基准。 可以使用BenchmarkSQL测试TPCC,如下所示: - 下载benchmarksql:[https://osdn.net/frs/g_redir.php?m=kent&f=benchmarksql%2Fbenchmarksql-5.0.zip](https://osdn.net/frs/g_redir.php?m=kent&f=benchmarksql/benchmarksql-5.0.zip) -- benchmarksql工具中的模式创建脚本需要调整为MOT语法,避免使用不支持的DDL。下载调整后的脚本:。该tar文件的内容包括sql.common.mogdb.mot文件夹和jTPCCTData.java文件,以及一个示例配置文件postgresql.conf和TPCC属性文件props.mot供参考。 +- benchmarksql工具中的模式创建脚本需要调整为MOT语法,避免使用不支持的DDL。下载调整后的脚本:。该tar文件的内容包括sql.common.mogdb.mot文件夹和jTPCCTData.java文件,以及一个示例配置文件postgresql.conf和TPCC属性文件props.mot供参考。 - 将sql.common.mogdb.mot文件夹放在run文件夹下与sql.common同级的文件夹,用下载的Java文件替换src/client/jTPCCTData.java文件。 - 编辑run文件夹下的runDatabaseBuild.sh文件,将extraHistID从AFTER_LOAD列表中删除,以避免不支持的ALTER表DDL。 -- 将lib/postgres文件夹下的JDBC驱动替换为MogDB JDBC。驱动下载链接:。 +- 将lib/postgres文件夹下的JDBC驱动替换为MogDB JDBC。驱动下载链接:。 在下载的Java文件(与原始文件相比)中所做的唯一更改是注释错误日志打印,以进行序列化和重复键错误。这些错误在MOT中是正常的,因为MOT使用的是乐观并发控制(OCC)机制。 -- Gitee From 2548eb8aa15babe33d12aa4f5257ffbd627d154d Mon Sep 17 00:00:00 2001 From: spaceoddity91719 Date: Wed, 20 Apr 2022 11:13:55 +0800 Subject: [PATCH 4/6] update:Development Specifications --- .../dev/1-development-specifications.md | 406 +++++++------ .../dev/1-development-specifications.md | 406 +++++++------ .../dev/1-development-specifications.md | 540 +++++++++--------- .../dev/1-development-specifications.md | 540 +++++++++--------- 4 files changed, 906 insertions(+), 986 deletions(-) diff --git a/product/en/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md b/product/en/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md index f91625bf..9217c892 100644 --- a/product/en/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md +++ b/product/en/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md @@ -2,7 +2,7 @@ title: Development Specifications summary: Development Specifications author: Guo Huan -date: 2021-07-21 +date: 2022-04-20 --- # Development Specifications @@ -23,17 +23,11 @@ Although ISO has issued SQL-92, SQL:1999, SQL:2006, and other standards for SQL, This specification emphasizes practicability and operability. According to the common problems and mistakes easily made by developers in the coding process, detailed and clear specifications and constraints are carried out on all aspects of code writing. It mainly includes the following content: - Naming specification - - Design specification - - Syntax specification - - Optimization-related specification - - PG compatibility -- Commonly used functions - In addition, specific examples are given for each detailed rule of the specification. ### Application Scope @@ -46,30 +40,24 @@ This specification applies to MogDB 1.1.0 and later versions. The unified standards for naming database objects, such as database, schema, table, column, view, index, constraint, sequence, function, trigger, etc. are as follows: -- It is advised to use a combination of lowercase letters, numbers, and underscores. - +- The length cannot exceed 63 characters. - It is advised to use meaningful English vocabularies. +- It is advised to use a combination of lowercase letters, numbers, and underscores. - It is not advised to use double quotation marks (") unless it must contain special characters such as uppercase letters or spaces. - -- The length cannot exceed 63 characters. - - It is not advised to start with PG, GS (to avoid confusion with the system DB object), and it is not advised to start with a number. - -- It is forbidden to use reserved words. Refer to official documents for reserved keywords. - +- It is forbidden to use [reserved words](2-keywords). Refer to official documents for reserved keywords. - The number of columns that a table can contain varies from 250 to 1600 depending on the field type. ### Temporary and Backup Object Naming -- It is recommended to add a date to the names of temporary or backup database objects (such as table), for example, dba.trade_record_2020_12_08 (where dba is the DBA-specific schema, trade_record is the table name, and 2020_12_08 is the backup date). +- It is recommended to add a date to the names of temporary or backup database objects (such as table), for example, `dba.trade_record_1970_01_01`(where dba is the DBA-specific schema, trade_record is the table name, and 1970_01_01 is the backup date). ### Tablespace Naming - The user tablespace of the database is represented by **ts_\**, where the **tablespace name** contains the following two categories: 1. Data space: For the user's default tablespace, it is represented by **default**. For other tablespaces, it is represented according to the category of the tables hosted on the tablespace. For example, the table that stores code is represented by **code**. The table that stores customer information is represented by **customer**. Try to use one tablespace to host the tables of that category. If a table is particularly large, consider using a separate tablespace. 2. Index space: add **idx_** in front of the name of the corresponding data tablespace. For example, the index space for the user's default tablespace is represented by **ts_idx_default**. For index tablespace of code table, use **ts_idx_code**. -- The tablespace name is prohibited to start with **PG_**. ### Index Naming @@ -79,21 +67,13 @@ The unified standards for naming database objects, such as database, schema, tab ```sql create unique index on departments(department_id); - CREATE INDEX - \di - +----------+-------------------------------+--------+---------+ - | Schema | Name | Type | Owner | - |----------+-------------------------------+--------+---------| - | mogdb | departments_department_id_idx | index | mogdb | - +----------+-------------------------------+--------+---------+ - SELECT 1 ``` @@ -115,68 +95,78 @@ SELECT 1 - The name should be consistent with its actual function. A verb should be used as a prefix command to cause an action to take place. -Example: The following naming conforms to the specification: + Example: The following naming conforms to the specification: -``` -func_addgroups (Add multiple groups) -func_addgroup (Add one group) -``` + ``` + func_addgroups (Add multiple groups) + func_addgroup (Add one group) + ``` ## Design Specification ### Database Design +- It is recommended to name the database after the business function, which is simple and intuitive. + - The database is preferentially created using the PG compatibility type. -- The database encoding can use only utf8. +- The recommended database encoding is utf8. ### Tablespace Design -- Generally larger tables or indexes use a separate tablespace. +- The frequently used tables and indexes are stored in a separate tablespace, which should be created on a disk with good performance. + +- Tables and indexes that are dominated by historical data or are less active can be stored in tablespaces with poor disk performance. + +- Tables and indexes can be stored separately in different tablespaces. -- The objects for which high frequency insert statements need to be run are divided into a group and stored in the corresponding tablespace. +- Tablespaces can also be divided by database, by schema, or by business function. -- The objects added, deleted, and modified are divided into groups and stored in the corresponding tablespace. +- Each database/schema corresponds to a tablespace and a corresponding index tablespace. -- Tables and indexes are stored in separate tablespaces. +### Schema Design -- In principle, each schema corresponds to a tablespace and a corresponding index tablespace; each large table under a schema corresponds to a separate tablespace and index tablespace. +- When you perform a user creation under a database, a schema with the same name will be created under that database by default. +- It is not recommended to create database objects under the default public schema. +- Create a schema that is different from the username for the business to use. ### Table Design -- When designing a table structure, you should plan well to avoid adding fields frequently, or modifying field types or lengths. +- When designing the table structure, it should be planned to avoid adding fields frequently or modifying field types or lengths. -- You must add comment information to the table, and make sure that the table name matches the comment information. +- Comment information must be added to the table, with the table name matching the comment information. -- It is forbidden to use the **unlogged** keyword to create a new table. By default, a non-compressed row-based table is created. +- The use of the unlogged/ temp/temporary keyword to create business tables is prohibited. -- When each table is created, you must specify the tablespace where it is located. Do not use the default tablespace to prevent the table from being built on the system tablespace and thereby causing performance problems. For data tables with busy transactions, they must be stored in a dedicated tablespace. +- The data type must be strictly consistent for the fields that are used as join relationships between tables to avoid indexes not working properly. -- The data types of the fields used for the connection relationship between the tables must be strictly consistent to avoid the inability of the index to be used normally. +- It is forbidden to use VARCHAR or other character types to store date values, and if used, operations cannot be done on this field and need to be strictly defined in the data specification. -- It is forbidden to use **VARCHAR** or other character types to store date values. If it is used, operations cannot be performed on this field, and it needs to be strictly defined in the data specification. +- For astore tables with frequent updates, it is recommended to specify the table fillfactor=85 when building the table to reserve space for HOT. -- The field must be added with a comment that can clearly indicate its meaning, and the description of each state value must be clearly listed in the comment of the state field. +- Tables used for frequent updates should be placed separately in a tablespace with good storage performance. -- For frequently updated tables, it is advised to specify **fillfactor=85** during table creation, and reserve 15% of the space on each page for HOT updates. +- It is recommended to consider partitioning for tables with data volume over billion or occupying more than 10GB on disk. -- The data type defined by the field in the table structure is consistent with that in the application, and the field collation rules between tables are consistent to avoid errors or inability to use indexes. +- The data types defined in the fields in the table structure are consistent with those defined in the application, and the field proofreading rules are consistent between tables to avoid error reporting or the inability to use indexes. - Note: For example, the data type of the **user_id** field of table A is defined as **varchar**, but the SQL statement is **where user_id=1234;** + > Note: For example, the data type of the **user_id** field of table A is defined as **varchar**, but the SQL statement is **where user_id=1234;** ### Partitioned Table Design - The number of partitioned tables is not recommended to exceed 1000. -- The primary key or unique index must contain the partition key. +- Partitioned tables can be selected with different tablespaces by frequency of use. -- For tables with a relatively large amount of data, they should be partitioned according to the properties of the table data to get a better performance. +- The primary key or unique index must contain partitioned keys. + +- For tables with larger data volume, partition according to the attributes of table data to get better performance. - To convert a normal table into a partitioned table, you need to create a new partitioned table, and then import the data from the normal table into the newly created partitioned table. Therefore, when you initially design the table, please plan in advance whether to use partitioned tables according to your business. -- For businesses with regular historical data deletion needs, it is recommended to partition the tables by time and not use the **DELETE** operation when deleting, but **DROP** or **TRUNCATE** the corresponding table. +- It is recommended that for businesses with regular historical data deletion needs, the tables are partitioned by time, and when deleting, do not use the DELETE operation, but DROP or TRUNCATE the corresponding table. -- It is not recommended to use a global index in a partitioned table, because the partition maintenance operation may cause the global index to fail and make it difficult to maintain. +- It is not recommended to use global indexes in partitioned tables, because doing partition maintenance operations may cause global indexes to fail, making it difficult to maintain. #### Use of Partitioned Table @@ -359,52 +349,66 @@ DROP TABLESPACE ### Column Design -- It is recommended to avoid using character types when numeric types can be used. +- Avoid duplication of column names with system tables. + +- Field meanings and data types should be consistent with the program code design. + +- All fields must have comment information added. -- It is recommended to avoid using **char(N)** if you can use **varchar(N)**, and avoid using **text** and **varchar** if you can use **varchar(N)**. +- Do not use character types when you can use numeric types. -- Only **char(N)**, **varchar(N)** and **text** character types are allowed. +- It is forbidden to store date data in character types. -- The newly created MogDB database is compatible with Oracle by default, and the **not null** constraint does not support empty strings. Empty strings will be converted to **null** by default. Databases compatible with the PG mode will not have this problem. +- Use timestamptz for time type fields. -- It is recommended to use **timestamp with time zone (timestamptz)** instead of **timestamp without time zone**. +- Try to require not null for fields and provide default values for fields. -- It is recommended to use **NUMERIC (precision, scale)** to store currency amounts and other values that require precise calculations, but not to use **real**, **double precision**. +- MogDB new database is compatible with oracle by default, not null constraint does not allow to pass empty string, empty string will be converted to null by default, compatible with PG mode database will not have this problem. ### Sequence Design -- It is forbidden to manually add sequences related to the table. +- Manual creation of table-related sequences is prohibited and should be specified in the serial/bingserial type way. -- A sequence is created by specifying the **serial** or **bigserial** type of the column when a table is created. +- It is recommended to set the step size of the sequence to 1. -- The sequence should be consistent with the variable definition type and range in the code to prevent data from being unable to be inserted. +- It is not recommended to set minvalue and maxvalue. + +- It is not recommended to set cache, the serial number is not consecutive after setting cache. + +- It is prohibited to turn on cycle. + +- Serial should be consistent with the type and range of variable definition in the code to prevent the inability to insert data. ### Constraint Design #### Primary Key Constraint - Each table must include a primary key. + - It is not recommended that the name of the primary key has the service meaning, such as identification certificate or country name although the name is unique. -- It is recommended that a primary key is written as id serial primary key or id bigserial primary key. + +- It is recommended that a primary key is written as `id serial primary key` or `id bigserial primary key`. + - It is recommended that the primary key in a large-sized table can be written as follows, which is easy to maintain later. -```sql -create table test(id serial not null ); -create unique index CONCURRENTLY ON test (id); -``` + ```sql + create table test(id serial not null ); + create unique index CONCURRENTLY ON test (id); + ``` #### Unique Constraint -Apart from the primary key, unique constraint is needed. You can create a unique index with uk_ as the prefix to create unique constraint. +Apart from the primary key, unique constraint is needed. You can create a unique index with **uk_** as the prefix to create unique constraint. #### Foreign Key Constraint - You'd better create foreign key constraints for a table with foreign key relationship. +- The use of foreign keys is not recommended for systems with high performance requirements and security under your control. - When using the foreign key, you must set the action of the foreign key, such as cascade, set null, or set default. #### Non-Empty Column -- All non-empty columns must be clearly marked as NOT NULL during database creation. After the database is used, no change can be performed. Additionally, you need to pay attention to the difference of the query results between NULL and "": null will be converted to NULL while "" does not display any character. +- All non-null columns must have the not null constraint added #### Check Constraint @@ -412,21 +416,20 @@ Apart from the primary key, unique constraint is needed. You can create a unique ### Index Design -- MogDB provides the row-store and column-store tables. The row-store table supports the btree (default), gin, and gist index types. The column-store table supports the Psort (default), btree, and gin index types. -- It is recommended that the CONCURRENTLY parameter is added when you create or drop an index. This can achieve concurrency when data is written into a table. The column-store, partition, and temporary tables do not support index created CONCURRENTLY. -- It is recommended that "create index CONCURRENTLY" and "drop index CONCURRENTLY" are used to maintain the related indexes of a table whose columns included in the indexes are frequently updated and deleted. -- It is recommended that unique index is used to replace unique constraints, facilitating follow-up maintenance. -- It is recommended that a joint index of multiple fields are created based on data distribution for a high-frequency query in which there are multiple fields and conditions in the where statement. -- Each table can include five indexes at most. -- Deep analysis is required for creation of composite indexes. - - The first field in a composite index needs to be correctly chosen. Generally, it has good selectivity and is a common field in the where clause. - - If several fields in a composite index are usually presented in a where clause and linked with AND, and single-field query is less or even not involved, you can create a composite index. Otherwise, you can create a single-field index. - - If several fields in a composite index are usually presented in a where clause individually, they can be divided into multiple single-field indexes. - - If both single-field index and composite index with the single field as its first column, the single-field index can be deleted. - - Typically, the first field in a composite index cannot be a time field because the time field is used to scan a range. However, when the former fields are scanned by range, the latter fields cannot be used for index filtration. - - A composite index can include four fields at most. -- For a table with the number of write times significantly greater than that of read times, you'd better not create too many indexes. -- Unused indexes and duplicated indexes should be deleted so that the execution plan and database performance are not affected. +- The number of table indexes for frequent DML operations is not recommended to exceed 5. +- Add concurrently parameter when create/drop index. +- Virtual indexes can be used to determine the validity of indexes before actually creating them. +- Create indexes for fields that frequently appear after the keywords order by, group by, and distinguish. +- Fields that are often used as query selections to create indexes. +- Indexes on attributes that are often used as table joins. +- The number of fields in a composite index is not recommended to exceed 3. +- Composite indexes should have one field that is a common search condition. +- The first field of a composite index should not have a single-field index. +- For tables where data is rarely updated and only a few of the fields are frequently queried, consider using index overrides. +- Do not create indexes on fields that have a large number of identical fetch values. +- It is recommended to use unique index instead of unique constraints for subsequent maintenance. +- It is recommended to build compound indexes with multiple fields for high frequency queries with multiple fields and conditions in where, with reference to the data distribution. +- Useless indexes and duplicate indexes should be deleted to avoid negative impact on the execution plan and database performance. ### View Design @@ -448,166 +451,142 @@ Apart from the primary key, unique constraint is needed. You can create a unique ### About NULL -- Note: Check whether it is null or is not null. -- Note: The values of the boolean type can be true, false, and NULL. -- Note: Pay attention to that the NOT IN set includes some NULL elements. - -```sql -mogdb=# SELECT * FROM (VALUES(1),(2)) v(a) ; a - -\--- - - 1 - - 2 - -(2 rows) - -mogdb=# select 1 NOT IN (1,NULL); - -?column? - -\--------- - -f +- Description: `NULL` judgment: `IS NULL`, `IS NOT NULL`. +- Description: Beware of `boolean` types taking the values `true`, `false`, `NULL`. -(1 row) +- Description: Beware of `NOT IN` collections with `NULL` elements. -mogdb=# select 2 NOT IN (1,NULL); +- Recommendation: Use `count(1)` or `count(*)` to count rows, but not `count(col)` to count rows, because `NULL` values will not be counted. -?column? +- Rule: When `count(multi-column names)`, the multi-column names must be enclosed in parentheses, e.g. `count( (col1,col2,col3) )`. -\--------- +- Note: With multi-column `count`, the row is counted even if all columns are `NULL`, so the effect is the same as `count(*)`. -(1 row) - -mogdb=# SELECT * FROM (VALUES(1),(2)) v(a) WHERE a NOT IN (1, NULL); a - -\--- - -(0 rows) -``` - -- Suggestion: It is recommended that count(1) or count(\*) is used to count the number of rows. count(col) is not used to count the number of rows because the NULL value is not counted. -- Rule: For count(names of multiple columns), the names of multiple columns must be enclosed in brackets, for example count((col1,col2,col3)). -- Note: For count (names of multiple columns), even if the values of all columns are null, the columns will also be counted. Therefore, the calculating result of count(names of multiple columns) is consistent with that of count(\*). -- Note: count(distinct col) is used to count the number of values that are distinct from each other and not null. - -count(distinct (col1,col2,...)) is used to calculate the unique value of those of all columns where NULL is counted. Additionally, two NULL values are considered the same. +- Note: `count(distingu col)` counts the number of non-`NULL` non-repeats of a column, `NULL` is not counted; `count(distingu (col1,col2,...) )` counts the unique values of multiple columns, `NULL` is counted, while `NULL` and `NULL` are considered the same. - Note: Distinction between count and sum of NULL -```sql -select count(1), count(a), sum(a) from (SELECT * FROM (VALUES (NULL), (2) ) v(a)) as foo where a is NULL; - -count | count | sum - --------+-------+----- - - 1 | 0 | - -(1 row) -``` + ```sql + select count(1), count(a), sum(a) from (SELECT * FROM (VALUES (NULL), (2) ) v(a)) as foo where a is NULL; + count | count | sum + -------+-------+----- + 1 | 0 | + (1 row) + ``` - Check whether two values are the same (NULL is considered as the same value). -```sql -select null is distinct from null; - -?column? - -\--------- - -f - -(1 row) - -select null is distinct from 1; + ```sql + select null is distinct from null; + ?column? + \--------- + f + (1 row) + + select null is distinct from 1; + ?column? + \--------- + t + (1 row) + + select null is not distinct from null; + ?column? + \--------- + t + (1 row) + + select null is not distinct from 1; + ?column? + \--------- + f + (1 row) + ``` -?column? - -\--------- - -t +### About Invalid Indexes -(1 row) +- During SQL statement writing, functions and expressions are usually used in query operations. It is not recommended that functions and expressions are used in condition columns. Using a function or expression in a condition column will make indexes of the condition column unused, thereby affecting the SQL query efficiency. It is recommended that functions or expressions are used in condition values. For example, -select null is not distinct from null; + ```sql + select name from tab where id+100>1000; + ``` -?column? + This statement can be changed to the following: -\--------- + ```sql + select name from tab where id>1000-100; + ``` -t +- Do not use left fuzzy query. For example, -(1 row) + ```sql + select id from tab where name like '%ly'; + ``` -select null is not distinct from 1; +- Do not use the negative query, such as not in/like. For example, -?column? + ```sql + select id from tab where name not in ('ly','ty'); + ``` -\--------- +### Ensuring That All Variables and Parameters Are Used -f +- Declare-variable also generates certain system overhead and makes code look loose. If some variables are not used in compilation, they will report alarms. Make sure that no any alarm is reported. -(1 row) -``` +## Query Operations -### About Invalid Indexes +### DDL Operation -- During SQL statement writing, functions and expressions are usually used in query operations. It is not recommended that functions and expressions are used in condition columns. Using a function or expression in a condition column will make indexes of the condition column unused, thereby affecting the SQL query efficiency. It is recommended that functions or expressions are used in condition values. For example, +- Any DDL operations on existing tables are prohibited during peak business periods - `select name from tab where id+100>1000;` +- All production DDL operations must be verified by the development test environment - This statement can be changed to the following: +- Concurrently should be used when maintaining indexes - `select name from tab where id>1000-100;` +- pg_repack should be used instead of vacuum full to rebuild the table -- Do not use left fuzzy query. For example, +- When adding fields with default values to a large table, it should be split into three parts: adding fields, filling default values and adding non-null constraints, such as breaking `alter table t add column col datatype not null default xxx;` into the following, to avoid too long a table lock caused by filling `default` values - `select id from tab where name like '%ly';` + ```sql + alter table t add column col datatype ; + alter table t alter column col set default xxx; + update table t set column= DEFAULT where id in ( select id from t where column is null limit + 1000 ) ; \watch 3 + alter table t alter column col set not null; + ``` -- Do not use the negative query, such as not in/like. For example, +### DML Operation - `select id from tab where name not in ('ly','ty');` +- The SQL statement for updating data is prohibited to appear `where 1=1` +- The amount of data operated by a single DML statement should not exceed 100,000 -### Ensuring That All Variables and Parameters Are Used +- When clearing the data in the table, `truncate` should be used -- Declare-variable also generates certain system overhead and makes code look loose. If some variables are not used in compilation, they will report alarms. Make sure that no any alarm is reported. +- For risky operations, you should open the transaction and confirm it before committing. -## Query Operations +- The SQL logic in the transaction should be as simple as possible, and the operation should be submitted in time after execution to avoid `idle in transaction` status. -### DDL Operation +- Use `copy` instead of `insert` when importing a large amount of data. -- Database object, especially columns with comments added can facilitate service learning and maintenance. -- DDL sent to DBAs, which is attached with common SQLs, such as SELECT, INSERT, DELETE, and UPDATE, can assist DBAs providing optimization suggestions, including creating index CONCURRENTLY. -- When columns need to be added to a large-sized table, "alter table t add column col datatype not null default xxx" can be processed as follows. This can prevent the table from being locked due to long time for filling in the default values. +- Consider deleting indexes before importing data, and rebuild them after importing. -```sql -alter table t add column col datatype ; +### DQL Operation -alter table t alter column col set default xxx; +- Prohibit the use of `select *`, apply the specific required field substitution +- Prohibit the use of `where 1=1` to avoid full table scan or Cartesian product -update table t set column= DEFAULT where id in ( select id from t where column is null limit +- The search condition value should be consistent with the field type to prevent not going to the index -1000 ) ; \watch 3 +- Fields to the left of the equal sign should be consistent with the index, especially conditional or functional indexes -alter table t alter column col set not null; -``` +- Pay attention to the execution plan of slow SQL, if it is not consistent with the expectation, change it as soon as possible -### DML Operation +- Use `count(*)` or `count(1)` to count rows, `count(column)` will not count `null` rows -- When updating a table, the "<>" judgement is needed. For example, the statement "update table_a set column_b = c where column_b <> c" indicates that a table needs to be updated to make the value of column b equal to that of column c if the value of column b is not equal to that of column c. In the statement, it is prohibited that the value of column b is equal to that of column c in the where clause. -- A single DML statement can support a maximum of 100 thousand data records. -- When a table needs to be cleared, it is recommended that TRUNCATE is used rather than DELETE. +- Limit the number of `join`, no more than 3 are recommended -### DQL Operation +- Recursive queries need to be limited to prevent infinite loops -- Typically, it is prohibited to use select \*. Selecting only necessary fields can reduce the consumption of including but not limited to network bandwidth and prevent programs from being affected by table structure modification, such as some prepare queries. -- For report-based queries or basic data queries, materialized views can be used to periodically take data snapshots, so that multiple tables are not performed on the same query repeatedly, especially for tables with frequent write operations. -- Window functions can be used for complex statistics queries. -- Make sure that the data type of the associated fields are consistent. It is prohibited to use implicit type conversion. -- The or statements of different fields can be replaced with union. +- For `or` operations, you should use `union all` or `union` instead ### Data Import @@ -632,7 +611,9 @@ alter table t alter column col set not null; During MogDB database creation, the following PG compatibility mode is used: +```sql create database dbnam DBCOMPATIBILITY='PG' encoding=’utf8’; +``` ### Data Type @@ -697,10 +678,10 @@ During the development, MogDB supports only the timestamp[(p )][with time zone] MogDB supports only the JSON type. -| Type | PostgreSQL | MogDB | Storage Length | Remarks | -| :---- | :--------- | :-------- | :------------- | :------ | -| json | Supported | Supported | / | | -| jsonb | Supported | / | / | | +| Type | PostgreSQL | MogDB | Storage Length | Remarks | +| :---- | :--------- | :-------------------------- | :------------- | :------ | +| json | Supported | Supported | / | | +| jsonb | Supported | Supported since version 2.1 | / | | ### Keywords @@ -735,16 +716,17 @@ In the following table, **Reserved** indicates that keywords in a database are r ### Implicit Conversion Comparison Table -| Input Type | Target Type | MogDB | -| :---------- | :--------------------------------------------------------- | :-------- | -| bool | int2, int4, int8 | Supported | -| int2 | bool, text, varchar,interval | Supported | -| int4 | bool, int2, text, varchar, interval | Supported | -| int8 | bool, text, varchar | Supported | -| text | int8, int4, int2, float4, float8, date, timestamp, nemeric | Supported | -| float4 | int8, int4, int2, text, varchar | Supported | -| float8 | int8, int4, int2, text, float4, varchar, interval, numeric | Supported | -| date | text, varchar | Supported | -| timestamp | text, varchar | Supported | -| timestamptz | text | Supported | -| numeric | int8, int4, int2, text, varchar, interval | Supported | +| Input Type | Target Type | MogDB | PG | +| :---------- | :----------------------------------------------------------- | :------------------------ | ------------------------------------------------------------ | +| bool | int2, int4, int8 | Supported | int4 is not supported, others are the same | +| int2 | bool, text, bpchar, varchar,interval | Supported (except bpchar) | NA | +| int4 | bool, int2, text, bpchar, varchar, interval | Supported (except bpchar) | bool is not supported, int2 is in assignment, others are the same | +| int8 | bool, text, bpchar, varchar | Supported (except bpchar) | NA | +| text | int8, int4, int2, float4, float8, date, timestamp, nemeric | Supported | NA | +| float4 | int8, int4, int2, text, bpchar, varchar | Supported (except bpchar) | First three are in assignment, others are the same | +| float8 | int8, int4, int2, text, float4, bpchar, varchar, interval, numeric | Supported (except bpchar) | int8, int4, int2, float4, numeric are in assignment, others are the same | +| bpchar | int8, int4, date, timestamp, numeric | | | +| date | text, bpchar, varchar | Supported (except bpchar) | NA | +| timestamp | text, varchar | Supported | NA | +| timestamptz | text | Supported | NA | +| numeric | int8, int4, int2, text, bpchar, varchar, interval | Supported (except bpchar) | First three are in assignment, others are the same | diff --git a/product/en/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md b/product/en/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md index f91625bf..9217c892 100644 --- a/product/en/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md +++ b/product/en/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md @@ -2,7 +2,7 @@ title: Development Specifications summary: Development Specifications author: Guo Huan -date: 2021-07-21 +date: 2022-04-20 --- # Development Specifications @@ -23,17 +23,11 @@ Although ISO has issued SQL-92, SQL:1999, SQL:2006, and other standards for SQL, This specification emphasizes practicability and operability. According to the common problems and mistakes easily made by developers in the coding process, detailed and clear specifications and constraints are carried out on all aspects of code writing. It mainly includes the following content: - Naming specification - - Design specification - - Syntax specification - - Optimization-related specification - - PG compatibility -- Commonly used functions - In addition, specific examples are given for each detailed rule of the specification. ### Application Scope @@ -46,30 +40,24 @@ This specification applies to MogDB 1.1.0 and later versions. The unified standards for naming database objects, such as database, schema, table, column, view, index, constraint, sequence, function, trigger, etc. are as follows: -- It is advised to use a combination of lowercase letters, numbers, and underscores. - +- The length cannot exceed 63 characters. - It is advised to use meaningful English vocabularies. +- It is advised to use a combination of lowercase letters, numbers, and underscores. - It is not advised to use double quotation marks (") unless it must contain special characters such as uppercase letters or spaces. - -- The length cannot exceed 63 characters. - - It is not advised to start with PG, GS (to avoid confusion with the system DB object), and it is not advised to start with a number. - -- It is forbidden to use reserved words. Refer to official documents for reserved keywords. - +- It is forbidden to use [reserved words](2-keywords). Refer to official documents for reserved keywords. - The number of columns that a table can contain varies from 250 to 1600 depending on the field type. ### Temporary and Backup Object Naming -- It is recommended to add a date to the names of temporary or backup database objects (such as table), for example, dba.trade_record_2020_12_08 (where dba is the DBA-specific schema, trade_record is the table name, and 2020_12_08 is the backup date). +- It is recommended to add a date to the names of temporary or backup database objects (such as table), for example, `dba.trade_record_1970_01_01`(where dba is the DBA-specific schema, trade_record is the table name, and 1970_01_01 is the backup date). ### Tablespace Naming - The user tablespace of the database is represented by **ts_\**, where the **tablespace name** contains the following two categories: 1. Data space: For the user's default tablespace, it is represented by **default**. For other tablespaces, it is represented according to the category of the tables hosted on the tablespace. For example, the table that stores code is represented by **code**. The table that stores customer information is represented by **customer**. Try to use one tablespace to host the tables of that category. If a table is particularly large, consider using a separate tablespace. 2. Index space: add **idx_** in front of the name of the corresponding data tablespace. For example, the index space for the user's default tablespace is represented by **ts_idx_default**. For index tablespace of code table, use **ts_idx_code**. -- The tablespace name is prohibited to start with **PG_**. ### Index Naming @@ -79,21 +67,13 @@ The unified standards for naming database objects, such as database, schema, tab ```sql create unique index on departments(department_id); - CREATE INDEX - \di - +----------+-------------------------------+--------+---------+ - | Schema | Name | Type | Owner | - |----------+-------------------------------+--------+---------| - | mogdb | departments_department_id_idx | index | mogdb | - +----------+-------------------------------+--------+---------+ - SELECT 1 ``` @@ -115,68 +95,78 @@ SELECT 1 - The name should be consistent with its actual function. A verb should be used as a prefix command to cause an action to take place. -Example: The following naming conforms to the specification: + Example: The following naming conforms to the specification: -``` -func_addgroups (Add multiple groups) -func_addgroup (Add one group) -``` + ``` + func_addgroups (Add multiple groups) + func_addgroup (Add one group) + ``` ## Design Specification ### Database Design +- It is recommended to name the database after the business function, which is simple and intuitive. + - The database is preferentially created using the PG compatibility type. -- The database encoding can use only utf8. +- The recommended database encoding is utf8. ### Tablespace Design -- Generally larger tables or indexes use a separate tablespace. +- The frequently used tables and indexes are stored in a separate tablespace, which should be created on a disk with good performance. + +- Tables and indexes that are dominated by historical data or are less active can be stored in tablespaces with poor disk performance. + +- Tables and indexes can be stored separately in different tablespaces. -- The objects for which high frequency insert statements need to be run are divided into a group and stored in the corresponding tablespace. +- Tablespaces can also be divided by database, by schema, or by business function. -- The objects added, deleted, and modified are divided into groups and stored in the corresponding tablespace. +- Each database/schema corresponds to a tablespace and a corresponding index tablespace. -- Tables and indexes are stored in separate tablespaces. +### Schema Design -- In principle, each schema corresponds to a tablespace and a corresponding index tablespace; each large table under a schema corresponds to a separate tablespace and index tablespace. +- When you perform a user creation under a database, a schema with the same name will be created under that database by default. +- It is not recommended to create database objects under the default public schema. +- Create a schema that is different from the username for the business to use. ### Table Design -- When designing a table structure, you should plan well to avoid adding fields frequently, or modifying field types or lengths. +- When designing the table structure, it should be planned to avoid adding fields frequently or modifying field types or lengths. -- You must add comment information to the table, and make sure that the table name matches the comment information. +- Comment information must be added to the table, with the table name matching the comment information. -- It is forbidden to use the **unlogged** keyword to create a new table. By default, a non-compressed row-based table is created. +- The use of the unlogged/ temp/temporary keyword to create business tables is prohibited. -- When each table is created, you must specify the tablespace where it is located. Do not use the default tablespace to prevent the table from being built on the system tablespace and thereby causing performance problems. For data tables with busy transactions, they must be stored in a dedicated tablespace. +- The data type must be strictly consistent for the fields that are used as join relationships between tables to avoid indexes not working properly. -- The data types of the fields used for the connection relationship between the tables must be strictly consistent to avoid the inability of the index to be used normally. +- It is forbidden to use VARCHAR or other character types to store date values, and if used, operations cannot be done on this field and need to be strictly defined in the data specification. -- It is forbidden to use **VARCHAR** or other character types to store date values. If it is used, operations cannot be performed on this field, and it needs to be strictly defined in the data specification. +- For astore tables with frequent updates, it is recommended to specify the table fillfactor=85 when building the table to reserve space for HOT. -- The field must be added with a comment that can clearly indicate its meaning, and the description of each state value must be clearly listed in the comment of the state field. +- Tables used for frequent updates should be placed separately in a tablespace with good storage performance. -- For frequently updated tables, it is advised to specify **fillfactor=85** during table creation, and reserve 15% of the space on each page for HOT updates. +- It is recommended to consider partitioning for tables with data volume over billion or occupying more than 10GB on disk. -- The data type defined by the field in the table structure is consistent with that in the application, and the field collation rules between tables are consistent to avoid errors or inability to use indexes. +- The data types defined in the fields in the table structure are consistent with those defined in the application, and the field proofreading rules are consistent between tables to avoid error reporting or the inability to use indexes. - Note: For example, the data type of the **user_id** field of table A is defined as **varchar**, but the SQL statement is **where user_id=1234;** + > Note: For example, the data type of the **user_id** field of table A is defined as **varchar**, but the SQL statement is **where user_id=1234;** ### Partitioned Table Design - The number of partitioned tables is not recommended to exceed 1000. -- The primary key or unique index must contain the partition key. +- Partitioned tables can be selected with different tablespaces by frequency of use. -- For tables with a relatively large amount of data, they should be partitioned according to the properties of the table data to get a better performance. +- The primary key or unique index must contain partitioned keys. + +- For tables with larger data volume, partition according to the attributes of table data to get better performance. - To convert a normal table into a partitioned table, you need to create a new partitioned table, and then import the data from the normal table into the newly created partitioned table. Therefore, when you initially design the table, please plan in advance whether to use partitioned tables according to your business. -- For businesses with regular historical data deletion needs, it is recommended to partition the tables by time and not use the **DELETE** operation when deleting, but **DROP** or **TRUNCATE** the corresponding table. +- It is recommended that for businesses with regular historical data deletion needs, the tables are partitioned by time, and when deleting, do not use the DELETE operation, but DROP or TRUNCATE the corresponding table. -- It is not recommended to use a global index in a partitioned table, because the partition maintenance operation may cause the global index to fail and make it difficult to maintain. +- It is not recommended to use global indexes in partitioned tables, because doing partition maintenance operations may cause global indexes to fail, making it difficult to maintain. #### Use of Partitioned Table @@ -359,52 +349,66 @@ DROP TABLESPACE ### Column Design -- It is recommended to avoid using character types when numeric types can be used. +- Avoid duplication of column names with system tables. + +- Field meanings and data types should be consistent with the program code design. + +- All fields must have comment information added. -- It is recommended to avoid using **char(N)** if you can use **varchar(N)**, and avoid using **text** and **varchar** if you can use **varchar(N)**. +- Do not use character types when you can use numeric types. -- Only **char(N)**, **varchar(N)** and **text** character types are allowed. +- It is forbidden to store date data in character types. -- The newly created MogDB database is compatible with Oracle by default, and the **not null** constraint does not support empty strings. Empty strings will be converted to **null** by default. Databases compatible with the PG mode will not have this problem. +- Use timestamptz for time type fields. -- It is recommended to use **timestamp with time zone (timestamptz)** instead of **timestamp without time zone**. +- Try to require not null for fields and provide default values for fields. -- It is recommended to use **NUMERIC (precision, scale)** to store currency amounts and other values that require precise calculations, but not to use **real**, **double precision**. +- MogDB new database is compatible with oracle by default, not null constraint does not allow to pass empty string, empty string will be converted to null by default, compatible with PG mode database will not have this problem. ### Sequence Design -- It is forbidden to manually add sequences related to the table. +- Manual creation of table-related sequences is prohibited and should be specified in the serial/bingserial type way. -- A sequence is created by specifying the **serial** or **bigserial** type of the column when a table is created. +- It is recommended to set the step size of the sequence to 1. -- The sequence should be consistent with the variable definition type and range in the code to prevent data from being unable to be inserted. +- It is not recommended to set minvalue and maxvalue. + +- It is not recommended to set cache, the serial number is not consecutive after setting cache. + +- It is prohibited to turn on cycle. + +- Serial should be consistent with the type and range of variable definition in the code to prevent the inability to insert data. ### Constraint Design #### Primary Key Constraint - Each table must include a primary key. + - It is not recommended that the name of the primary key has the service meaning, such as identification certificate or country name although the name is unique. -- It is recommended that a primary key is written as id serial primary key or id bigserial primary key. + +- It is recommended that a primary key is written as `id serial primary key` or `id bigserial primary key`. + - It is recommended that the primary key in a large-sized table can be written as follows, which is easy to maintain later. -```sql -create table test(id serial not null ); -create unique index CONCURRENTLY ON test (id); -``` + ```sql + create table test(id serial not null ); + create unique index CONCURRENTLY ON test (id); + ``` #### Unique Constraint -Apart from the primary key, unique constraint is needed. You can create a unique index with uk_ as the prefix to create unique constraint. +Apart from the primary key, unique constraint is needed. You can create a unique index with **uk_** as the prefix to create unique constraint. #### Foreign Key Constraint - You'd better create foreign key constraints for a table with foreign key relationship. +- The use of foreign keys is not recommended for systems with high performance requirements and security under your control. - When using the foreign key, you must set the action of the foreign key, such as cascade, set null, or set default. #### Non-Empty Column -- All non-empty columns must be clearly marked as NOT NULL during database creation. After the database is used, no change can be performed. Additionally, you need to pay attention to the difference of the query results between NULL and "": null will be converted to NULL while "" does not display any character. +- All non-null columns must have the not null constraint added #### Check Constraint @@ -412,21 +416,20 @@ Apart from the primary key, unique constraint is needed. You can create a unique ### Index Design -- MogDB provides the row-store and column-store tables. The row-store table supports the btree (default), gin, and gist index types. The column-store table supports the Psort (default), btree, and gin index types. -- It is recommended that the CONCURRENTLY parameter is added when you create or drop an index. This can achieve concurrency when data is written into a table. The column-store, partition, and temporary tables do not support index created CONCURRENTLY. -- It is recommended that "create index CONCURRENTLY" and "drop index CONCURRENTLY" are used to maintain the related indexes of a table whose columns included in the indexes are frequently updated and deleted. -- It is recommended that unique index is used to replace unique constraints, facilitating follow-up maintenance. -- It is recommended that a joint index of multiple fields are created based on data distribution for a high-frequency query in which there are multiple fields and conditions in the where statement. -- Each table can include five indexes at most. -- Deep analysis is required for creation of composite indexes. - - The first field in a composite index needs to be correctly chosen. Generally, it has good selectivity and is a common field in the where clause. - - If several fields in a composite index are usually presented in a where clause and linked with AND, and single-field query is less or even not involved, you can create a composite index. Otherwise, you can create a single-field index. - - If several fields in a composite index are usually presented in a where clause individually, they can be divided into multiple single-field indexes. - - If both single-field index and composite index with the single field as its first column, the single-field index can be deleted. - - Typically, the first field in a composite index cannot be a time field because the time field is used to scan a range. However, when the former fields are scanned by range, the latter fields cannot be used for index filtration. - - A composite index can include four fields at most. -- For a table with the number of write times significantly greater than that of read times, you'd better not create too many indexes. -- Unused indexes and duplicated indexes should be deleted so that the execution plan and database performance are not affected. +- The number of table indexes for frequent DML operations is not recommended to exceed 5. +- Add concurrently parameter when create/drop index. +- Virtual indexes can be used to determine the validity of indexes before actually creating them. +- Create indexes for fields that frequently appear after the keywords order by, group by, and distinguish. +- Fields that are often used as query selections to create indexes. +- Indexes on attributes that are often used as table joins. +- The number of fields in a composite index is not recommended to exceed 3. +- Composite indexes should have one field that is a common search condition. +- The first field of a composite index should not have a single-field index. +- For tables where data is rarely updated and only a few of the fields are frequently queried, consider using index overrides. +- Do not create indexes on fields that have a large number of identical fetch values. +- It is recommended to use unique index instead of unique constraints for subsequent maintenance. +- It is recommended to build compound indexes with multiple fields for high frequency queries with multiple fields and conditions in where, with reference to the data distribution. +- Useless indexes and duplicate indexes should be deleted to avoid negative impact on the execution plan and database performance. ### View Design @@ -448,166 +451,142 @@ Apart from the primary key, unique constraint is needed. You can create a unique ### About NULL -- Note: Check whether it is null or is not null. -- Note: The values of the boolean type can be true, false, and NULL. -- Note: Pay attention to that the NOT IN set includes some NULL elements. - -```sql -mogdb=# SELECT * FROM (VALUES(1),(2)) v(a) ; a - -\--- - - 1 - - 2 - -(2 rows) - -mogdb=# select 1 NOT IN (1,NULL); - -?column? - -\--------- - -f +- Description: `NULL` judgment: `IS NULL`, `IS NOT NULL`. +- Description: Beware of `boolean` types taking the values `true`, `false`, `NULL`. -(1 row) +- Description: Beware of `NOT IN` collections with `NULL` elements. -mogdb=# select 2 NOT IN (1,NULL); +- Recommendation: Use `count(1)` or `count(*)` to count rows, but not `count(col)` to count rows, because `NULL` values will not be counted. -?column? +- Rule: When `count(multi-column names)`, the multi-column names must be enclosed in parentheses, e.g. `count( (col1,col2,col3) )`. -\--------- +- Note: With multi-column `count`, the row is counted even if all columns are `NULL`, so the effect is the same as `count(*)`. -(1 row) - -mogdb=# SELECT * FROM (VALUES(1),(2)) v(a) WHERE a NOT IN (1, NULL); a - -\--- - -(0 rows) -``` - -- Suggestion: It is recommended that count(1) or count(\*) is used to count the number of rows. count(col) is not used to count the number of rows because the NULL value is not counted. -- Rule: For count(names of multiple columns), the names of multiple columns must be enclosed in brackets, for example count((col1,col2,col3)). -- Note: For count (names of multiple columns), even if the values of all columns are null, the columns will also be counted. Therefore, the calculating result of count(names of multiple columns) is consistent with that of count(\*). -- Note: count(distinct col) is used to count the number of values that are distinct from each other and not null. - -count(distinct (col1,col2,...)) is used to calculate the unique value of those of all columns where NULL is counted. Additionally, two NULL values are considered the same. +- Note: `count(distingu col)` counts the number of non-`NULL` non-repeats of a column, `NULL` is not counted; `count(distingu (col1,col2,...) )` counts the unique values of multiple columns, `NULL` is counted, while `NULL` and `NULL` are considered the same. - Note: Distinction between count and sum of NULL -```sql -select count(1), count(a), sum(a) from (SELECT * FROM (VALUES (NULL), (2) ) v(a)) as foo where a is NULL; - -count | count | sum - --------+-------+----- - - 1 | 0 | - -(1 row) -``` + ```sql + select count(1), count(a), sum(a) from (SELECT * FROM (VALUES (NULL), (2) ) v(a)) as foo where a is NULL; + count | count | sum + -------+-------+----- + 1 | 0 | + (1 row) + ``` - Check whether two values are the same (NULL is considered as the same value). -```sql -select null is distinct from null; - -?column? - -\--------- - -f - -(1 row) - -select null is distinct from 1; + ```sql + select null is distinct from null; + ?column? + \--------- + f + (1 row) + + select null is distinct from 1; + ?column? + \--------- + t + (1 row) + + select null is not distinct from null; + ?column? + \--------- + t + (1 row) + + select null is not distinct from 1; + ?column? + \--------- + f + (1 row) + ``` -?column? - -\--------- - -t +### About Invalid Indexes -(1 row) +- During SQL statement writing, functions and expressions are usually used in query operations. It is not recommended that functions and expressions are used in condition columns. Using a function or expression in a condition column will make indexes of the condition column unused, thereby affecting the SQL query efficiency. It is recommended that functions or expressions are used in condition values. For example, -select null is not distinct from null; + ```sql + select name from tab where id+100>1000; + ``` -?column? + This statement can be changed to the following: -\--------- + ```sql + select name from tab where id>1000-100; + ``` -t +- Do not use left fuzzy query. For example, -(1 row) + ```sql + select id from tab where name like '%ly'; + ``` -select null is not distinct from 1; +- Do not use the negative query, such as not in/like. For example, -?column? + ```sql + select id from tab where name not in ('ly','ty'); + ``` -\--------- +### Ensuring That All Variables and Parameters Are Used -f +- Declare-variable also generates certain system overhead and makes code look loose. If some variables are not used in compilation, they will report alarms. Make sure that no any alarm is reported. -(1 row) -``` +## Query Operations -### About Invalid Indexes +### DDL Operation -- During SQL statement writing, functions and expressions are usually used in query operations. It is not recommended that functions and expressions are used in condition columns. Using a function or expression in a condition column will make indexes of the condition column unused, thereby affecting the SQL query efficiency. It is recommended that functions or expressions are used in condition values. For example, +- Any DDL operations on existing tables are prohibited during peak business periods - `select name from tab where id+100>1000;` +- All production DDL operations must be verified by the development test environment - This statement can be changed to the following: +- Concurrently should be used when maintaining indexes - `select name from tab where id>1000-100;` +- pg_repack should be used instead of vacuum full to rebuild the table -- Do not use left fuzzy query. For example, +- When adding fields with default values to a large table, it should be split into three parts: adding fields, filling default values and adding non-null constraints, such as breaking `alter table t add column col datatype not null default xxx;` into the following, to avoid too long a table lock caused by filling `default` values - `select id from tab where name like '%ly';` + ```sql + alter table t add column col datatype ; + alter table t alter column col set default xxx; + update table t set column= DEFAULT where id in ( select id from t where column is null limit + 1000 ) ; \watch 3 + alter table t alter column col set not null; + ``` -- Do not use the negative query, such as not in/like. For example, +### DML Operation - `select id from tab where name not in ('ly','ty');` +- The SQL statement for updating data is prohibited to appear `where 1=1` +- The amount of data operated by a single DML statement should not exceed 100,000 -### Ensuring That All Variables and Parameters Are Used +- When clearing the data in the table, `truncate` should be used -- Declare-variable also generates certain system overhead and makes code look loose. If some variables are not used in compilation, they will report alarms. Make sure that no any alarm is reported. +- For risky operations, you should open the transaction and confirm it before committing. -## Query Operations +- The SQL logic in the transaction should be as simple as possible, and the operation should be submitted in time after execution to avoid `idle in transaction` status. -### DDL Operation +- Use `copy` instead of `insert` when importing a large amount of data. -- Database object, especially columns with comments added can facilitate service learning and maintenance. -- DDL sent to DBAs, which is attached with common SQLs, such as SELECT, INSERT, DELETE, and UPDATE, can assist DBAs providing optimization suggestions, including creating index CONCURRENTLY. -- When columns need to be added to a large-sized table, "alter table t add column col datatype not null default xxx" can be processed as follows. This can prevent the table from being locked due to long time for filling in the default values. +- Consider deleting indexes before importing data, and rebuild them after importing. -```sql -alter table t add column col datatype ; +### DQL Operation -alter table t alter column col set default xxx; +- Prohibit the use of `select *`, apply the specific required field substitution +- Prohibit the use of `where 1=1` to avoid full table scan or Cartesian product -update table t set column= DEFAULT where id in ( select id from t where column is null limit +- The search condition value should be consistent with the field type to prevent not going to the index -1000 ) ; \watch 3 +- Fields to the left of the equal sign should be consistent with the index, especially conditional or functional indexes -alter table t alter column col set not null; -``` +- Pay attention to the execution plan of slow SQL, if it is not consistent with the expectation, change it as soon as possible -### DML Operation +- Use `count(*)` or `count(1)` to count rows, `count(column)` will not count `null` rows -- When updating a table, the "<>" judgement is needed. For example, the statement "update table_a set column_b = c where column_b <> c" indicates that a table needs to be updated to make the value of column b equal to that of column c if the value of column b is not equal to that of column c. In the statement, it is prohibited that the value of column b is equal to that of column c in the where clause. -- A single DML statement can support a maximum of 100 thousand data records. -- When a table needs to be cleared, it is recommended that TRUNCATE is used rather than DELETE. +- Limit the number of `join`, no more than 3 are recommended -### DQL Operation +- Recursive queries need to be limited to prevent infinite loops -- Typically, it is prohibited to use select \*. Selecting only necessary fields can reduce the consumption of including but not limited to network bandwidth and prevent programs from being affected by table structure modification, such as some prepare queries. -- For report-based queries or basic data queries, materialized views can be used to periodically take data snapshots, so that multiple tables are not performed on the same query repeatedly, especially for tables with frequent write operations. -- Window functions can be used for complex statistics queries. -- Make sure that the data type of the associated fields are consistent. It is prohibited to use implicit type conversion. -- The or statements of different fields can be replaced with union. +- For `or` operations, you should use `union all` or `union` instead ### Data Import @@ -632,7 +611,9 @@ alter table t alter column col set not null; During MogDB database creation, the following PG compatibility mode is used: +```sql create database dbnam DBCOMPATIBILITY='PG' encoding=’utf8’; +``` ### Data Type @@ -697,10 +678,10 @@ During the development, MogDB supports only the timestamp[(p )][with time zone] MogDB supports only the JSON type. -| Type | PostgreSQL | MogDB | Storage Length | Remarks | -| :---- | :--------- | :-------- | :------------- | :------ | -| json | Supported | Supported | / | | -| jsonb | Supported | / | / | | +| Type | PostgreSQL | MogDB | Storage Length | Remarks | +| :---- | :--------- | :-------------------------- | :------------- | :------ | +| json | Supported | Supported | / | | +| jsonb | Supported | Supported since version 2.1 | / | | ### Keywords @@ -735,16 +716,17 @@ In the following table, **Reserved** indicates that keywords in a database are r ### Implicit Conversion Comparison Table -| Input Type | Target Type | MogDB | -| :---------- | :--------------------------------------------------------- | :-------- | -| bool | int2, int4, int8 | Supported | -| int2 | bool, text, varchar,interval | Supported | -| int4 | bool, int2, text, varchar, interval | Supported | -| int8 | bool, text, varchar | Supported | -| text | int8, int4, int2, float4, float8, date, timestamp, nemeric | Supported | -| float4 | int8, int4, int2, text, varchar | Supported | -| float8 | int8, int4, int2, text, float4, varchar, interval, numeric | Supported | -| date | text, varchar | Supported | -| timestamp | text, varchar | Supported | -| timestamptz | text | Supported | -| numeric | int8, int4, int2, text, varchar, interval | Supported | +| Input Type | Target Type | MogDB | PG | +| :---------- | :----------------------------------------------------------- | :------------------------ | ------------------------------------------------------------ | +| bool | int2, int4, int8 | Supported | int4 is not supported, others are the same | +| int2 | bool, text, bpchar, varchar,interval | Supported (except bpchar) | NA | +| int4 | bool, int2, text, bpchar, varchar, interval | Supported (except bpchar) | bool is not supported, int2 is in assignment, others are the same | +| int8 | bool, text, bpchar, varchar | Supported (except bpchar) | NA | +| text | int8, int4, int2, float4, float8, date, timestamp, nemeric | Supported | NA | +| float4 | int8, int4, int2, text, bpchar, varchar | Supported (except bpchar) | First three are in assignment, others are the same | +| float8 | int8, int4, int2, text, float4, bpchar, varchar, interval, numeric | Supported (except bpchar) | int8, int4, int2, float4, numeric are in assignment, others are the same | +| bpchar | int8, int4, date, timestamp, numeric | | | +| date | text, bpchar, varchar | Supported (except bpchar) | NA | +| timestamp | text, varchar | Supported | NA | +| timestamptz | text | Supported | NA | +| numeric | int8, int4, int2, text, bpchar, varchar, interval | Supported (except bpchar) | First three are in assignment, others are the same | diff --git a/product/zh/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md b/product/zh/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md index 1bb81b51..2c2b12ec 100644 --- a/product/zh/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md +++ b/product/zh/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md @@ -23,78 +23,63 @@ date: 2021-04-27 本规范强调实用性、可操作性,根据开发人员在编码过程中常见的问题和易犯的错误,对代码编写的各方面均进行了详细明确的规范和约束。主要包括下列内容: - 命名规范 - - 设计规范 - - 语法规范 - - 优化相关 - - PG兼容性 -- 常用函数 - 除此之外,对规范的每条细则均给出具体的范例。 ### 适用范围 -本规范适用于MogDB/openGauss 1.0.0及以上版本。 +本规范适用于MogDB 1.0.0及以上版本。 ## 命名规范 ### 对象命名统一规范 -数据库对象,如database, schema, table, column, view, index, constraint,sequence, function, trigger等命名统一标准如下: +数据库对象,如database、schema、table、column、view、index、constraint、sequence、function、trigger等命名统一标准如下: -- 建议使用小写字母、数字、下划线的组合 +- 长度不能超过63个字符 - 命名尽量采用富有意义英文词汇 -- 建议不使用双引号即"包围,除非必须包含大写字母或空格等特殊字符 +- 建议使用小写字母、数字、下划线的组合 -- 长度不能超过63个字符 +- 建议不使用双引号即"包围,除非必须包含大写字母或空格等特殊字符 - 不建议以PG、GS开头(避免与系统DB object混淆),不建议以数字开头 -- 禁止使用保留字,保留关键字参考官方文档。 +- 禁止使用[保留字](2-keywords),保留关键字参考官方文档 + +- table能包含的column数目,根据字段类型的不同,数目在250到1600之间 -- table能包含的column数目,根据字段类型的不同,数目在 250 到 1600 之间 ### 临时及备份对象命名 -- 临时或备份的数据库对象名,如table,建议添加日期,如dba.trade_record_2020_12_08 (其中dba 为DBA专用schema,trade_record为表名,2020_12_08为备份日期)。 +- 临时或备份的数据库对象名,如table,建议添加日期,如`dba.trade_record_1970_01_01`(其中dba 为DBA专用schema,trade_record为表名,1970_01_01为备份日期)。 ### 表空间命名 -- 数据库的用户表空间用ts_<表空间名>来表现,其中,表空间名分为: - 1. 数据空间:对于用户的缺省表空间,用default来表现。对于其他的表空间,根据寄存在表空间上的表的类别来表现。如放代码的表,用code来表现。放客户资料的表,用customer来表现。尽量用一个表空间来寄存该类的表。如果某表特殊大,可考虑单独使用一个表空间。 - 2. 索引空间:在相应的数据表空间的名字前加idx_。如对用户缺省表空间的索引空间,用ts_idx_default来表现。对代码表的索引表空间,用ts_idx_code来表现。 - -- 表空间名禁止以"PG_"开头。 +- 数据库的用户表空间用`ts_<表空间名>`来表现,其中,表空间名分为: + 1. 数据空间:对于用户的缺省表空间,用`default`来表现。对于其他的表空间,根据寄存在表空间上的表的类别来表现。如放代码的表,用`code`来表现。放客户资料的表,用`customer`来表现。尽量用一个表空间来寄存该类的表。如果某表特殊大,可考虑单独使用一个表空间。 + 2. 索引空间:在相应的数据表空间的名字前加idx_。如对用户缺省表空间的索引空间,用`ts_idx_default`来表现。对代码表的索引表空间,用`ts_idx_code`来表现。 ### 索引命名 -- Index对象命名规则为: 表名_列名_idx,如student_name_idx,该索引命名方式为MogDB/openGauss数据库在创建索引时没有明确指定索引名称时的默认命名方式。 +- Index对象命名规则为:`表名_列名_idx`,如`student_name_idx`,该索引命名方式为MogDB数据库在创建索引时没有明确指定索引名称时的默认命名方式。 - 因此建议创建索引时,不显式给出index name,使用DBMS默认值。 + 因此建议创建索引时,不显式给出index name,使用DBMS默认值。 ```sql create unique index on departments(department_id); - CREATE INDEX - \di - +----------+-------------------------------+--------+---------+ - | Schema | Name | Type | Owner | - |----------+-------------------------------+--------+---------| - | mogdb | departments_department_id_idx | index | mogdb | - +----------+-------------------------------+--------+---------+ - SELECT 1 ``` @@ -108,7 +93,7 @@ SELECT 1 - 分区表的表名遵循普通表的正常命名规则。 -- 按时间范围分区(每月一个分区),分区名字为PART_YYYYMM。 +- 按时间范围分区(每月一个分区),分区名字为`PART_YYYYMM`。 举例: PART_201901、PART_201902 @@ -116,32 +101,40 @@ SELECT 1 - 名称应与其实际功能保持一致。导致发生某动作应以动词为前缀命令。 -示例:以下命名符合规范: + 示例:以下命名符合规范: -``` -func_addgroups(增加多个群组) -func_addgroup(增加一个群组) -``` + ``` + func_addgroups(增加多个群组) + func_addgroup(增加一个群组) + ``` ## 设计规范 ### database设计 -- 数据库优先使用兼容PG类型的方式来创建。 +- 建议以业务功能来命名数据库,简单直观。 -- 数据库编码只能用utf8。 +- 业务数据库推荐以兼容PG的方式来创建。 + +- 数据库编码推荐用utf8。 ### tablespace设计 -- 一般较大的表或索引单独分配一个tablespace。 +- 频繁使用的表和索引单独存放在一个表空间,此表空间应在性能好的磁盘上创建。 + +- 以历史数据为主,或活跃度较低的表和索引可以存放在磁盘性能较差的表空间。 -- 高频率insert的对象分成一组,存在对应的tablespace中。 +- 表和索引可以单独存放在不同的表空间。 -- 增、删、改的对象分成一组,存在对应的tablespace中。 +- 表空间也可以按数据库分、按schema分或按业务功能来分。 -- 表和索引分别存于不同的tablespace。 +- 每个database/schema对应一个表空间和一个相应索引表空间。 -- 原则上每个schema对应一个表空间和一个相应索引表空间;每个schema下的大表对应一个单独的表空间和索引表空间。 +### schema设计 + +- 在一个数据库下执行创建用户时,默认会在该数据库下创建一个同名schema。 +- 不建议在默认public schema下创建数据库对象。 +- 创建一个与用户名不同的schema给业务使用。 ### table设计 @@ -149,26 +142,29 @@ func_addgroup(增加一个群组) - 必须为表添加注释信息,表名与注释信息相匹配。 -- 禁止使用unlogged关键字新建表,默认创建非压缩行表。 - -- 每个表在创建时候,必须指定所在的表空间,不要采用默认表空间以防止表建立在系统表空间上导致性能问题。对于事务比较繁忙的数据表,必须存放在专用表空间中。 +- 禁止使用unlogged/ temp/temporary关键字创建业务表。 - 作为表间连接关系的字段,数据类型必须保持严格一致,避免索引无法正常使用。 - 禁止使用VARCHAR或其他字符类型来存储日期值,如果使用,则不能在此字段上做运算,需要在数据规范中严格定义。 -- 字段必须添加能够清楚表示其含义的注释,状态类字段的注释中必须明确列出各状态值的说明。 +- 对于频繁更新的astore表,建议建表时指定表的fillfactor=85,给HOT预留空间。 + +- 频繁更新使用的表应该单独放在存储性能好的表空间。 -- 对于频繁更新的表,建议建表时指定表的fillfactor=85,每页预留15%的空间给HOT更新使用。 +- 数据量超过亿级或占用磁盘超过10GB的表,建议考虑分区。 - 表结构中字段定义的数据类型与应用程序中的定义保持一致,表之间字段校对规则一致,避免报错或无法使用索引的情况发生。 - 说明:比如A表user_id字段数据类型定义为varchar,但是SQL语句查询为 where user_id=1234; + > 说明:比如A表user_id字段数据类型定义为varchar,但是SQL语句查询为`where user_id=1234;` + ### partition table设计 - 分区表的个数不建议超过1000个。 +- 分区表可以按使用频度选择不同的表空间。 + - 主键或唯一索引必须要包含分区键。 - 对于数据量比较大的表,根据表数据的属性进行分区,以得到较好的性能。 @@ -360,26 +356,38 @@ DROP TABLESPACE ### column设计 -- 建议可以采用数值类型的场合,则避免采用字符类型。 +- 避免与系统表的列名重复。 + +- 字段含义及数据类型要与程序代码设计保持一致。 + +- 所有字段必须要添加comment注释信息。 -- 建议可以采用varchar(N) 就避免采用char(N),可以采用varchar(N) 就避免采用text,varchar。 +- 能使用数值类型,就不要使用字符类型。 -- 只允许用char(N)、varchar(N)及text字符类型。 +- 禁止用字符类型存储日期数据。 -- MogDB/openGauss新建数据库默认兼容oracle,not null 约束不允许传入空字符串,空字符串默认会转换为null,兼容PG模式的数据库不会有这个问题。 +- 时间类型字段统一使用timestamptz。 -- 建议使用timestamp with time zone(timestamptz),而不用timestamp without time zone。 +- 字段尽量要求not null,为字段提供默认值。 + +- MogDB新建数据库默认兼容oracle,not null约束不允许传入空字符串,空字符串默认会转换为null,兼容PG模式的数据库不会有这个问题。 -- 建议使用NUMERIC(precision, scale)来存储货币金额和其它要求精确计算的数值, 而不建议使用real, double precision。 ### 序列设计 -- 禁止手动添加与表相关的序列。 +- 禁止手动创建与表相关的序列,应指定serial/bingserial类型方式创建。 + +- 序列的步长建议设置为1。 + +- 不建议设置minvalue和maxvalue。 -- 建表时指定列的serial或bigserial类型的方式来创建序列。 +- 不建议设置cache,设置cache后序列号不连续。 + +- 禁止开启cycle。 - 序列应与代码中变量定义类型及范围一致,防止无法插入数据。 + ### constraint设计 #### 主键约束 @@ -388,14 +396,14 @@ DROP TABLESPACE - 建议不要用有业务含义的名称作为主键,比如身份证或者国家名称,尽管其是unique的。 -- 建议主键的一步到位的写法:id serial primary key 或id bigserial primary key。 +- 建议主键的一步到位的写法:`id serial primary key`或`id bigserial primary key`。 - 建议内容系统中size较大的table主键的等效写法如下,便于后续维护。 -```sql -create table test(id serial not null ); -create unique index CONCURRENTLY ON test (id); -``` + ```sql + create table test(id serial not null ); + create unique index CONCURRENTLY ON test (id); + ``` #### 唯一约束 @@ -405,34 +413,33 @@ create unique index CONCURRENTLY ON test (id); - 存在外键关系的表上尽量创建外键约束。 -- 使用外键时,一定要设置fk的action,例如cascade,set null,set default。 +- 性能要求高而安全性自己控制的系统不建议使用外键。 + +- 使用外键时,一定要设置fk的action,例如cascade、set null、set default。 #### 非空列 -- 所有非空列须在建表之初明确标识“NOT NULL”,上线之后,不再变更。同时需注意 NULL 与空字符‘’的查询结果差别:null 值会转换为 NULL,而空值无字符显示。 +- 所有非空列必须添加not null约束 #### 检查约束 -- 对于字段有检查性约束,一般要求指定check规则。例如:性别、状态等字段。 +- 对于字段有检查性约束,一般要求指定check规则。例如:性别、状态等字段。 ### index设计 -- MogDB/openGauss 提供的index类型: 行存表支持的索引类型:btree(行存表缺省值)、gin、gist。列存表支持的索引类型:Psort(列存表缺省值)、btree、gin。 -- 建议create 或 drop index 时,加 CONCURRENTLY参数,这是个好习惯,达到与写入数据并发的效果,列存表、分区表和临时表不支持CONCURRENTLY方式创建索引。 -- 建议对于频繁update、delete的包含于index 定义中的column的table,用create index CONCURRENTLY , drop index CONCURRENTLY的方式进行维护其对应index。 -- 建议用unique index 代替unique constraints便于后续维护。 -- 建议对where 中带多个字段and条件的高频 query,参考数据分布情况,建多个字段的联合index。 -- 每个表的index数量不能超过5个。 -- 复合索引的建立需要进行仔细分析: - - 正确选择复合索引中的第一个字段,一般是选择性较好的且在where子句中常用的字段上; - - 复合索引的几个字段是否经常同时以AND方式出现在Where子句中?单字段查询是否极少甚至没有?如果是,则可以建立复合索引;否则考虑单字段索引; - - 如果复合索引中包含的字段经常单独出现在Where子句中,则分解为多个单字段索引; - - 如果既有单字段索引,又有以这个字段为首列的复合索引,一般可考虑删除单字段索引; - - 复合索引第一个字段一般不使用时间字段,因为时间字段多用于范围扫描,而前面的字段使用范围扫描后,后续字段无法用于索引过滤。 - - 复合索引字段个数不能超过4个。 - -- 频繁DML(写次数明显超过读次数)的表,不要建立太多的索引。 - +- 频繁DML操作的表索引数量不建议超过5个。 +- create/drop index时添加concurrently参数。 +- 真正创建索引前可以使用虚拟索引确定索引的有效性。 +- 经常出现在关键字order by、group by、distinct后面的字段,建立索引。 +- 经常用作查询选择的字段,建立索引。 +- 经常用作表连接的属性上,建立索引。 +- 复合索引的字段数不建议超过3个。 +- 复合索引得一个字段是常用检索条件。 +- 复合索引第一个字段不应存在单字段索引。 +- 对数据很少被更新的表,经常只查询其中的几个字段,考虑使用索引覆盖。 +- 不要在有大量相同取值的字段上建立索引。 +- 建议用unique index代替unique constraints便于后续维护。 +- 建议对where中带多个字段and条件的高频query,参考数据分布情况,建多个字段的复合index。 - 无用的索引以及重复索引应删除,避免对执行计划及数据库性能造成负面影响。 ### view设计 @@ -457,133 +464,83 @@ create unique index CONCURRENTLY ON test (id); ### 关于NULL -- 说明:NULL 的判断:IS NULL ,IS NOT NULL。 - -- 说明:注意boolean 类型取值 true,false,NULL。 - -- 说明:小心NOT IN 集合中带有NULL元素。 - -```sql -mydb=# SELECT * FROM (VALUES(1),(2)) v(a) ; a - -\--- - - 1 - - 2 - -(2 rows) - -mydb=# select 1 NOT IN (1,NULL); - -?column? - -\--------- - -f - -(1 row) - -mydb=# select 2 NOT IN (1,NULL); - -?column? - -\--------- - -(1 row) - -mydb=# SELECT * FROM (VALUES(1),(2)) v(a) WHERE a NOT IN (1, NULL); a - -\--- - -(0 rows) -``` - -- 建议:使用count(1) 或count(*) 来统计行数,而不建议使用count(col) 来统计行数,因为NULL值不会计入。 - -- 规则:count(多列列名)时,多列列名必须使用括号,例如count( (col1,col2,col3) )。 - -- 注意:多列的count,即使所有列都为NULL,该行也被计数,所以效果与count(*) 一致。 - -- 注意:count(distinct col) 计算某列的非NULL不重复数量,NULL不被计数 - - count(distinct (col1,col2,...) ) 计算多列的唯一值时,NULL会被计数,同时NULL与NULL会被认为是相同的。 - -- 注意:NULL 的count与sum - -```sql -select count(1), count(a), sum(a) from (SELECT * FROM (VALUES (NULL), (2) ) v(a)) as foo where a is NULL; - -count | count | sum - --------+-------+----- - - 1 | 0 | - -(1 row) -``` - -- 判断两个值是否相同(将NULL视为相同的值) - -```sql -select null is distinct from null; - -?column? - -\--------- - -f - -(1 row) - -select null is distinct from 1; - -?column? - -\--------- - -t - -(1 row) - -select null is not distinct from null; - -?column? - -\--------- - -t - -(1 row) - -select null is not distinct from 1; - -?column? - -\--------- - -f - -(1 row) -``` +- 说明:`NULL`的判断:`IS NULL`、`IS NOT NULL`。 + +- 说明:注意`boolean`类型取值`true`、`false`、`NULL`。 + +- 说明:小心`NOT IN`集合中带有`NULL`元素。 + +- 建议:使用`count(1)`或`count(*)`来统计行数,而不建议使用`count(col)`来统计行数,因为`NULL`值不会计入。 + +- 规则:`count(多列列名)`时,多列列名必须使用括号,例如`count( (col1,col2,col3) )`。 + +- 注意:多列的`count`,即使所有列都为`NULL`,该行也被计数,所以效果与`count(*)`一致。 + +- 注意:`count(distinct col)`计算某列的非`NULL`不重复数量,`NULL`不被计数;`count(distinct (col1,col2,...) )`计算多列的唯一值时,`NULL`会被计数,同时`NULL`与`NULL`会被认为是相同的。 + +- 注意:`NULL`的`count`与`sum` + + ```sql + select count(1), count(a), sum(a) from (SELECT * FROM (VALUES (NULL), (2) ) v(a)) as foo where a is NULL; + count | count | sum + -------+-------+----- + 1 | 0 | + (1 row) + ``` + +- 判断两个值是否相同(将`NULL`视为相同的值) + + ```sql + select null is distinct from null; + ?column? + \--------- + f + (1 row) + + select null is distinct from 1; + ?column? + \--------- + t + (1 row) + + select null is not distinct from null; + ?column? + \--------- + t + (1 row) + + select null is not distinct from 1; + ?column? + \--------- + f + (1 row) + ``` ### 关于索引失效 - 在书写SQL语句时经常会在查询中使用函数及表达式,建议尽量不要在条件列上使用函数及表达式。在条件列上使用函数或者表达式的时候会导致使用不上该条件列上的索引,从而影响SQL的查询效率。尽量把函数或者表达式用在条件值上,避免使用在条件列上。示例: - `select name from tab where id+100>1000;` + ```sql + select name from tab where id+100>1000; + ``` 可以改写为如下形式: - `select name from tab where id>1000-100;` + ```sql + select name from tab where id>1000-100; + ``` - 查询语句中尽量不要使用左模糊查询。示例: - `select id from tab where name like '%ly';` + ```sql + select id from tab where name like '%ly'; + ``` -- 查询中尽量不要使用负向查询,如not in/like,示例: +- 查询中尽量不要使用负向查询,如`not in/like`,示例: - `select id from tab where name not in ('ly','ty');` + ```sql + select id from tab where name not in ('ly','ty'); + ``` ### 确保使用到所有变量和参数 @@ -593,47 +550,65 @@ f ### DDL操作 -- DB object 尤其是COLUMN 加COMMENT,便于后续新人了解业务及维护 +- 业务高峰期禁止对已存在的表执行任何DDL操作 -- 发给DBA 发布的DDL,附带常用SQL: SELECT, INSERT ,DELETE, UPDATE,便于DBA给出create index CONCURRENTLY等其他优化建议 +- 所有生产DDL操作必须经过开发测试环境验证 -- 向大size的table中add column时,将 alter table t add column col datatype not null default xxx;分解为如下,避免填充default值导致的过长时间锁表 +- 维护索引时应采用concurrently的方式 -```sql -alter table t add column col datatype ; +- 应该使用pg_repack替换vacuum full来重建表 -alter table t alter column col set default xxx; +- 大表添加带默认值的字段时,应拆分为:添加字段、填补默认值及添加非空约束三部分,如将`alter table t add column col datatype not null default xxx;`分解为如下,避免填充`default`值导致的过长时间锁表 -update table t set column= DEFAULT where id in ( select id from t where column is null limit + ```sql + alter table t add column col datatype ; + alter table t alter column col set default xxx; + update table t set column= DEFAULT where id in ( select id from t where column is null limit + 1000 ) ; \watch 3 + alter table t alter column col set not null; + ``` -1000 ) ; \watch 3 +### DML操作 -alter table t alter column col set not null; -``` +- 更新数据的SQL语句禁止出现`where 1=1` -### DML操作 +- 单条DML语句操作的数据量不超过10万 + +- 清空表中的数据时,应使用`truncate` + +- 对于风险性较高的操作,应该显示的开启事务,确认无误后在提交 + +- 事务中SQL逻辑尽量简单,操作执行完后要及时提交,避免`idle in transaction`状态 -- update 时做 <> 判断,比如update table_a set column_b = c where column_b <> c,禁止出现where 1=1 +- 大批数据入库时应使用`copy`替换`insert` -- 单条DML语句操作数据量不超过10万 +- 数据导入前考虑先删除索引,导入完成后重建 -- 建议清空表时,使用truncate,不建议使用delete ### DQL操作 -- 非必须时禁止使用select *,只取所需字段,以减少包括不限于网络带宽消耗,避免表结构变更对程序的影响(比如某些prepare query) +- 禁止使用`select *`,应用具体所需字段替换 -- 对报表类的或生成基础数据的查询,使用物化视图(MATERIALIZED VIEW)定期固化数据快照,避免对多表(尤其多写频繁的表)重复跑相同的查询 +- 禁止使用`where 1=1`,避免全表扫描或笛卡尔积 -- 复杂的统计查询可以尝试窗口函数 Window Functions +- 检索条件值应该与字段类型保持一致,防止不走索引 -- 避免关联字段数据类型不一致,禁止使用隐式类型转换 +- 等号左边的字段应该与索引保持一致,尤其是条件索引或函数索引 + +- 关注慢SQL的执行计划,如与预期不一致,尽快修改 + +- 使用`count(*)`或`count(1)`来统计行数,`count(column)`不会统计`null`行 + +- 限制`join`的数量,不建议超过3个 + +- 递归查询需要做好限制,防止无限循环 + +- 对于or运算,应该使用`union all`或`union`替换 -- 不同字段的or语句使用union代替 ### 数据导入 -- 建议大批量的数据入库时,使用copy,不建议使用insert,以提高写入速度 +- 建议大批量的数据入库时,使用`copy`,不建议使用`insert`,以提高写入速度 - 导入数据前需要先删除相关索引,导入完成后重建,提高数据导入速度 @@ -643,13 +618,13 @@ alter table t alter column col set not null; - 执行CRAETE、DROP、ALTER等DDL操作,尤其多条,不要显式的开transaction,因为加lock的mode非常高,极易产生deadlock -- state 为 idle in transaction 的连接,如果出现在Master,会无谓的lock住相应的资源,可导致后续产生lock,甚至deadlock; 出现在Slave,可导致卡住主从同步 +- state为`idle in transaction`的连接,如果出现在Master,会无谓的lock住相应的资源,可导致后续产生lock,甚至deadlock;出现在Slave,可导致卡住主从同步 ### 其他 -- 建议运行在SSD上的实例,random_page_cost (默认值为4) 设置为1.0~2.0之间,使查询规划器更倾向于使用索引扫描 +- 建议运行在SSD上的实例,random_page_cost(默认值为4)设置为1.0~2.0之间,使查询规划器更倾向于使用索引扫描 -- 建议在需要使用explain analyze查看实际真正执行计划与时间时,如果是写入query,强烈建议先开启事务,然后回滚。 +- 建议在需要使用`explain analyze`查看实际真正执行计划与时间时,如果是写入query,强烈建议先开启事务,然后回滚。 - 对于频繁更新,膨胀率较高的表,应找窗口期执行表重组,降低高水位 @@ -657,83 +632,85 @@ alter table t alter column col set not null; ### 建库规范 -MogDB/openGauss创建数据库使用PG兼容模式如下: +MogDB创建数据库使用PG兼容模式方法如下: +```sql create database dbnam DBCOMPATIBILITY='PG' encoding=’utf8’; +``` ### 数据类型 #### 数值类型 -在开发使用中,MogDB/openGauss只允许使用smallint、integer、bigint、numeric[(p[,s])]、serial、bigserial 这几种类型。 - -| 类型 | PostgreSQL | openGauss | 存储尺寸 | 备注 | -| ---------------- | ---------- | --------- | ------------ | ------------------------------------------------------- | -| tinyint | / | 支持 | 1字节 | 0 ~ 255 | -| smallint | 支持 | 支持 | 2字节 | -32,768 ~ +32,767 | -| integer | 支持 | 支持 | 4字节 | -2,147,483,648 ~ +2,147,483,647 | -| binary_integer | / | 支持 | / | integer别名 | -| bigint | 支持 | 支持 | 8字节 | -9,223,372,036,854,775,808 ~ +9,223,372,036,854,775,807 | -| decimal[(p[,s])] | 支持 | 支持 | 可变字节 | 最高小数点前131072位,以及小数点后16383位 | -| numeric[(p[,s])] | 支持 | 支持 | 可变字节 | 最高小数点前131072位,以及小数点后16383位 | -| number[(p[,s])] | / | 支持 | / | numeric别名 | -| real | 支持 | 支持 | 4字节 | 6位十进制数字精度 | -| float4 | / | 支持 | 4字节 | 6位十进制数字精度 | -| double precision | 支持 | 支持 | 8字节 | 15位十进制数字精度 | -| binary_double | / | 支持 | 8字节 | double precision别名 | -| float8 | / | 支持 | 8字节 | 15位十进制数字精度 | -| float[(p )] | / | 支持 | 4字节或8字节 | | -| dec[(p,[s])] | / | 支持 | / | 最高小数点前131072位,以及小数点后16383位 | -| integer[(p,[s])] | / | 支持 | / | 最高小数点前131072位,以及小数点后16383位 | -| smallserial | 支持 | 支持 | 2字节 | 1 ~ 32,767 | -| serial | 支持 | 支持 | 4字节 | 1 ~ 2,147,483,647 | -| bigserial | 支持 | 支持 | 8字节 | 1 ~ 9,223,372,036,854,775,807 | -| tinyint | / | 支持 | 1字节 | 0 ~ 255 | +在开发使用中,MogDB只允许使用smallint、integer、bigint、numeric[(p[,s])]、serial、bigserial这几种类型。 + +| 类型 | PostgreSQL | MogDB | 存储尺寸 | 备注 | +| ---------------- | ---------- | ----- | ------------ | ------------------------------------------------------- | +| tinyint | / | 支持 | 1字节 | 0 ~ 255 | +| smallint | 支持 | 支持 | 2字节 | -32,768 ~ +32,767 | +| integer | 支持 | 支持 | 4字节 | -2,147,483,648 ~ +2,147,483,647 | +| binary_integer | / | 支持 | / | integer别名 | +| bigint | 支持 | 支持 | 8字节 | -9,223,372,036,854,775,808 ~ +9,223,372,036,854,775,807 | +| decimal[(p[,s])] | 支持 | 支持 | 可变字节 | 最高小数点前131072位,以及小数点后16383位 | +| numeric[(p[,s])] | 支持 | 支持 | 可变字节 | 最高小数点前131072位,以及小数点后16383位 | +| number[(p[,s])] | / | 支持 | / | numeric别名 | +| real | 支持 | 支持 | 4字节 | 6位十进制数字精度 | +| float4 | / | 支持 | 4字节 | 6位十进制数字精度 | +| double precision | 支持 | 支持 | 8字节 | 15位十进制数字精度 | +| binary_double | / | 支持 | 8字节 | double precision别名 | +| float8 | / | 支持 | 8字节 | 15位十进制数字精度 | +| float[(p )] | / | 支持 | 4字节或8字节 | | +| dec[(p,[s])] | / | 支持 | / | 最高小数点前131072位,以及小数点后16383位 | +| integer[(p,[s])] | / | 支持 | / | 最高小数点前131072位,以及小数点后16383位 | +| smallserial | 支持 | 支持 | 2字节 | 1 ~ 32,767 | +| serial | 支持 | 支持 | 4字节 | 1 ~ 2,147,483,647 | +| bigserial | 支持 | 支持 | 8字节 | 1 ~ 9,223,372,036,854,775,807 | +| tinyint | / | 支持 | 1字节 | 0 ~ 255 | #### 字符类型 -在开发使用中,MogDB/openGauss只允许使用char(n)、varchar(n)、text字符类型。 +在开发使用中,MogDB只允许使用char(n)、varchar(n)、text字符类型。 -| 类型 | PostgreSQL | openGauss | 存储尺寸 | 备注 | -| ------------ | ---------- | --------- | ----------------------------- | ------------------------------------------------------------ | -| char(n) | 支持 | 支持 | pg中最大为1GB,og中最大为10MB | pg中n指字符数量,og中n指字节数量,兼容pg模式配置下n代表字符数量 | -| nchar(n) | / | 支持 | 最大为10MB | n指字节数量,兼容pg模式配置下n代表字符数量 | -| varchar(n) | 支持 | 支持 | pg中最大为1GB,og中最大为10MB | pg中n指字符数量,og中n指字节数量,兼容pg模式配置下n代表字符数量 | -| varchar2(n) | / | 支持 | 最大为10MB | varchar(n)别名 | -| nvarchar2(n) | / | 支持 | 最大为10MB | n指字符数量 | -| text | 支持 | 支持 | 1GB - 1 | | -| clob | / | 支持 | 1GB - 1 | text别名 | +| 类型 | PostgreSQL | MogDB | 存储尺寸 | 备注 | +| ------------ | ---------- | ----- | ----------------------------- | ------------------------------------------------------------ | +| char(n) | 支持 | 支持 | pg中最大为1GB,og中最大为10MB | pg中n指字符数量,og中n指字节数量,兼容pg模式配置下n代表字符数量 | +| nchar(n) | / | 支持 | 最大为10MB | n指字节数量,兼容pg模式配置下n代表字符数量 | +| varchar(n) | 支持 | 支持 | pg中最大为1GB,og中最大为10MB | pg中n指字符数量,og中n指字节数量,兼容pg模式配置下n代表字符数量 | +| varchar2(n) | / | 支持 | 最大为10MB | varchar(n)别名 | +| nvarchar2(n) | / | 支持 | 最大为10MB | n指字符数量 | +| text | 支持 | 支持 | 1GB - 1 | | +| clob | / | 支持 | 1GB - 1 | text别名 | #### 时间类型 -在开发使用中,MogDB/openGauss只允许使用timestamp[(p )][with time zone]、date日期类型。 +在开发使用中,MogDB只允许使用`timestamp[(p)][with time zone]`、`date`期类型。 -| 类型 | PostgreSQL | openGauss | 存储尺寸 | 备注 | +| 类型 | PostgreSQL | MogDB | 存储尺寸 | 备注 | | ----------------------------------- | ---------- | --------- | -------- | ------------------------------------------------------- | | timestamp[(p )][without time zone] | 支持 | 支持 | 8字节 | 4713 BC - 294276 AD | -| timestamp[(p )][with time zone] | 支持 | 支持 | 8字节 | 4713 BC - 294276 AD | +| timestamp[(p )][with time zone] | 支持 | 支持 | 8字节 | 4713 BC - 294276 AD | | date | 支持 | 支持 | 4字节 | 4713 BC - 5874897 AD (og实际存储空间大小为8字节) | -| time[(p )][without time zone] | 支持 | 支持 | 8字节 | 00:00:00 - 24:00:00 | -| time[(p )][with time zone] | 支持 | 支持 | 12字节 | 00:00:00+1459 - 24:00:00-1459 | -| interval[fields][(p )] | 支持 | 支持 | 16字节 | -178000000年 - 178000000年 | +| time[(p )][without time zone] | 支持 | 支持 | 8字节 | 00:00:00 - 24:00:00 | +| time[(p )][with time zone] | 支持 | 支持 | 12字节 | 00:00:00+1459 - 24:00:00-1459 | +| interval[fields][(p )] | 支持 | 支持 | 16字节 | -178000000年 - 178000000年 | | smalldatetime | / | 支持 | 8字节 | 日期和时间,不带时区,精确到分钟,秒位大于等于30秒进一位 | -| interval day(1) to second(p ) | / | 支持 | 16字节 | | +| interval day(1) to second(p) | / | 支持 | 16字节 | | | reltime | / | 支持 | 4字节 | | #### json类型 -MogDB/openGauss只允许使用json类型。 +MogDB只允许使用json类型。 -| 类型 | PostgreSQL | openGauss | 存储尺寸 | 备注 | -| ----- | ---------- | --------- | -------- | ---- | -| json | 支持 | 支持 | / | | -| jsonb | 支持 | / | / | | +| 类型 | PostgreSQL | MogDB | 存储尺寸 | 备注 | +| ----- | ---------- | ----------- | -------- | ---- | +| json | 支持 | 支持 | / | | +| jsonb | 支持 | 2.1开始支持 | / | | ### 关键字 -PostgreSQL与openGauss数据库中关键字差异化说明,保留是指数据库保留关键字,不允许自定义使用;非保留或空是指可以自定义使用,MogDB/openGauss保留字详情,请参考附件。 +PostgreSQL与MogDB数据库中关键字差异化说明,保留是指数据库保留关键字,不允许自定义使用;非保留或空是指可以自定义使用,MogDB保留字详情,请参考[此页面](2-keywords)。 -| 关键字 | MogDB/openGauss | PostgreSQL | +| 关键字 | MogDB | PostgreSQL | | ------------- | ------------------------ | ------------------------ | | AUTHID | 保留 | #N/A | | BUCKETS | 保留 | #N/A | @@ -760,18 +737,19 @@ PostgreSQL与openGauss数据库中关键字差异化说明,保留是指数据 | SYSDATE | 保留 | #N/A | | VERIFY | 保留 | #N/A | -### 隐式转换对应表 - -| input_type | target_type | MogDB/openGauss | -| ----------- | ---------------------------------------------------------- | --------------- | -| bool | int2、int4、int8 | 支持 | -| int2 | bool、text、varchar、interval | 支持 | -| int4 | bool、int2、text、varchar、interval | 支持 | -| int8 | bool、text、varchar | 支持 | -| text | int8、int4、int2、float4、float8、date、timestamp、nemeric | 支持 | -| float4 | int8、int4、int2、text、varchar | 支持 | -| float8 | int8、int4、int2、text、float4、varchar、interval、numeric | 支持 | -| date | text、varchar | 支持 | -| timestamp | text、varchar | 支持 | -| timestamptz | text | 支持 | -| numeric | int8、int4、int2、text、varchar、interval | 支持 | +### 隐式转换异同 + +| input_type | target_type | MogDB | PG | +| ----------- | ------------------------------------------------------------ | ---------------- | -------------------------------------------------------- | +| bool | int2、int4、int8 | 支持 | int4不支持,其它无 | +| int2 | bool、text、bpchar、varchar、interval | 支持(bpchar无) | 无 | +| int4 | bool、int2、text、bpchar、varchar、interval | 支持(bpchar无) | bool不支持,int2为in assignment,其它无 | +| int8 | bool、text、bpchar、varchar | 支持(bpchar无) | 无 | +| text | int8、int4、int2、float4、float8、date、timestamp、nemeric | 支持 | 无 | +| float4 | int8、int4、int2、text、bpchar、varchar | 支持(bpchar无) | 前三in assignment,后无 | +| float8 | int8、int4、int2、text、float4、bpchar、varchar、interval、numeric | 支持(bpchar无) | int8、int4、int2、float4、numeric为in assignment,其它无 | +| bpchar | int8、int4、date、timestamp、numeric | | | +| date | text、bpchar、varchar | 支持(bpchar无) | 无 | +| timestamp | text、varchar | 支持 | 无 | +| timestamptz | text | 支持 | 无 | +| numeric | int8、int4、int2、text、bpchar、varchar、interval | 支持(bpchar无) | 前三in assignment,后无 | diff --git a/product/zh/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md b/product/zh/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md index 1bb81b51..2c2b12ec 100644 --- a/product/zh/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md +++ b/product/zh/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md @@ -23,78 +23,63 @@ date: 2021-04-27 本规范强调实用性、可操作性,根据开发人员在编码过程中常见的问题和易犯的错误,对代码编写的各方面均进行了详细明确的规范和约束。主要包括下列内容: - 命名规范 - - 设计规范 - - 语法规范 - - 优化相关 - - PG兼容性 -- 常用函数 - 除此之外,对规范的每条细则均给出具体的范例。 ### 适用范围 -本规范适用于MogDB/openGauss 1.0.0及以上版本。 +本规范适用于MogDB 1.0.0及以上版本。 ## 命名规范 ### 对象命名统一规范 -数据库对象,如database, schema, table, column, view, index, constraint,sequence, function, trigger等命名统一标准如下: +数据库对象,如database、schema、table、column、view、index、constraint、sequence、function、trigger等命名统一标准如下: -- 建议使用小写字母、数字、下划线的组合 +- 长度不能超过63个字符 - 命名尽量采用富有意义英文词汇 -- 建议不使用双引号即"包围,除非必须包含大写字母或空格等特殊字符 +- 建议使用小写字母、数字、下划线的组合 -- 长度不能超过63个字符 +- 建议不使用双引号即"包围,除非必须包含大写字母或空格等特殊字符 - 不建议以PG、GS开头(避免与系统DB object混淆),不建议以数字开头 -- 禁止使用保留字,保留关键字参考官方文档。 +- 禁止使用[保留字](2-keywords),保留关键字参考官方文档 + +- table能包含的column数目,根据字段类型的不同,数目在250到1600之间 -- table能包含的column数目,根据字段类型的不同,数目在 250 到 1600 之间 ### 临时及备份对象命名 -- 临时或备份的数据库对象名,如table,建议添加日期,如dba.trade_record_2020_12_08 (其中dba 为DBA专用schema,trade_record为表名,2020_12_08为备份日期)。 +- 临时或备份的数据库对象名,如table,建议添加日期,如`dba.trade_record_1970_01_01`(其中dba 为DBA专用schema,trade_record为表名,1970_01_01为备份日期)。 ### 表空间命名 -- 数据库的用户表空间用ts_<表空间名>来表现,其中,表空间名分为: - 1. 数据空间:对于用户的缺省表空间,用default来表现。对于其他的表空间,根据寄存在表空间上的表的类别来表现。如放代码的表,用code来表现。放客户资料的表,用customer来表现。尽量用一个表空间来寄存该类的表。如果某表特殊大,可考虑单独使用一个表空间。 - 2. 索引空间:在相应的数据表空间的名字前加idx_。如对用户缺省表空间的索引空间,用ts_idx_default来表现。对代码表的索引表空间,用ts_idx_code来表现。 - -- 表空间名禁止以"PG_"开头。 +- 数据库的用户表空间用`ts_<表空间名>`来表现,其中,表空间名分为: + 1. 数据空间:对于用户的缺省表空间,用`default`来表现。对于其他的表空间,根据寄存在表空间上的表的类别来表现。如放代码的表,用`code`来表现。放客户资料的表,用`customer`来表现。尽量用一个表空间来寄存该类的表。如果某表特殊大,可考虑单独使用一个表空间。 + 2. 索引空间:在相应的数据表空间的名字前加idx_。如对用户缺省表空间的索引空间,用`ts_idx_default`来表现。对代码表的索引表空间,用`ts_idx_code`来表现。 ### 索引命名 -- Index对象命名规则为: 表名_列名_idx,如student_name_idx,该索引命名方式为MogDB/openGauss数据库在创建索引时没有明确指定索引名称时的默认命名方式。 +- Index对象命名规则为:`表名_列名_idx`,如`student_name_idx`,该索引命名方式为MogDB数据库在创建索引时没有明确指定索引名称时的默认命名方式。 - 因此建议创建索引时,不显式给出index name,使用DBMS默认值。 + 因此建议创建索引时,不显式给出index name,使用DBMS默认值。 ```sql create unique index on departments(department_id); - CREATE INDEX - \di - +----------+-------------------------------+--------+---------+ - | Schema | Name | Type | Owner | - |----------+-------------------------------+--------+---------| - | mogdb | departments_department_id_idx | index | mogdb | - +----------+-------------------------------+--------+---------+ - SELECT 1 ``` @@ -108,7 +93,7 @@ SELECT 1 - 分区表的表名遵循普通表的正常命名规则。 -- 按时间范围分区(每月一个分区),分区名字为PART_YYYYMM。 +- 按时间范围分区(每月一个分区),分区名字为`PART_YYYYMM`。 举例: PART_201901、PART_201902 @@ -116,32 +101,40 @@ SELECT 1 - 名称应与其实际功能保持一致。导致发生某动作应以动词为前缀命令。 -示例:以下命名符合规范: + 示例:以下命名符合规范: -``` -func_addgroups(增加多个群组) -func_addgroup(增加一个群组) -``` + ``` + func_addgroups(增加多个群组) + func_addgroup(增加一个群组) + ``` ## 设计规范 ### database设计 -- 数据库优先使用兼容PG类型的方式来创建。 +- 建议以业务功能来命名数据库,简单直观。 -- 数据库编码只能用utf8。 +- 业务数据库推荐以兼容PG的方式来创建。 + +- 数据库编码推荐用utf8。 ### tablespace设计 -- 一般较大的表或索引单独分配一个tablespace。 +- 频繁使用的表和索引单独存放在一个表空间,此表空间应在性能好的磁盘上创建。 + +- 以历史数据为主,或活跃度较低的表和索引可以存放在磁盘性能较差的表空间。 -- 高频率insert的对象分成一组,存在对应的tablespace中。 +- 表和索引可以单独存放在不同的表空间。 -- 增、删、改的对象分成一组,存在对应的tablespace中。 +- 表空间也可以按数据库分、按schema分或按业务功能来分。 -- 表和索引分别存于不同的tablespace。 +- 每个database/schema对应一个表空间和一个相应索引表空间。 -- 原则上每个schema对应一个表空间和一个相应索引表空间;每个schema下的大表对应一个单独的表空间和索引表空间。 +### schema设计 + +- 在一个数据库下执行创建用户时,默认会在该数据库下创建一个同名schema。 +- 不建议在默认public schema下创建数据库对象。 +- 创建一个与用户名不同的schema给业务使用。 ### table设计 @@ -149,26 +142,29 @@ func_addgroup(增加一个群组) - 必须为表添加注释信息,表名与注释信息相匹配。 -- 禁止使用unlogged关键字新建表,默认创建非压缩行表。 - -- 每个表在创建时候,必须指定所在的表空间,不要采用默认表空间以防止表建立在系统表空间上导致性能问题。对于事务比较繁忙的数据表,必须存放在专用表空间中。 +- 禁止使用unlogged/ temp/temporary关键字创建业务表。 - 作为表间连接关系的字段,数据类型必须保持严格一致,避免索引无法正常使用。 - 禁止使用VARCHAR或其他字符类型来存储日期值,如果使用,则不能在此字段上做运算,需要在数据规范中严格定义。 -- 字段必须添加能够清楚表示其含义的注释,状态类字段的注释中必须明确列出各状态值的说明。 +- 对于频繁更新的astore表,建议建表时指定表的fillfactor=85,给HOT预留空间。 + +- 频繁更新使用的表应该单独放在存储性能好的表空间。 -- 对于频繁更新的表,建议建表时指定表的fillfactor=85,每页预留15%的空间给HOT更新使用。 +- 数据量超过亿级或占用磁盘超过10GB的表,建议考虑分区。 - 表结构中字段定义的数据类型与应用程序中的定义保持一致,表之间字段校对规则一致,避免报错或无法使用索引的情况发生。 - 说明:比如A表user_id字段数据类型定义为varchar,但是SQL语句查询为 where user_id=1234; + > 说明:比如A表user_id字段数据类型定义为varchar,但是SQL语句查询为`where user_id=1234;` + ### partition table设计 - 分区表的个数不建议超过1000个。 +- 分区表可以按使用频度选择不同的表空间。 + - 主键或唯一索引必须要包含分区键。 - 对于数据量比较大的表,根据表数据的属性进行分区,以得到较好的性能。 @@ -360,26 +356,38 @@ DROP TABLESPACE ### column设计 -- 建议可以采用数值类型的场合,则避免采用字符类型。 +- 避免与系统表的列名重复。 + +- 字段含义及数据类型要与程序代码设计保持一致。 + +- 所有字段必须要添加comment注释信息。 -- 建议可以采用varchar(N) 就避免采用char(N),可以采用varchar(N) 就避免采用text,varchar。 +- 能使用数值类型,就不要使用字符类型。 -- 只允许用char(N)、varchar(N)及text字符类型。 +- 禁止用字符类型存储日期数据。 -- MogDB/openGauss新建数据库默认兼容oracle,not null 约束不允许传入空字符串,空字符串默认会转换为null,兼容PG模式的数据库不会有这个问题。 +- 时间类型字段统一使用timestamptz。 -- 建议使用timestamp with time zone(timestamptz),而不用timestamp without time zone。 +- 字段尽量要求not null,为字段提供默认值。 + +- MogDB新建数据库默认兼容oracle,not null约束不允许传入空字符串,空字符串默认会转换为null,兼容PG模式的数据库不会有这个问题。 -- 建议使用NUMERIC(precision, scale)来存储货币金额和其它要求精确计算的数值, 而不建议使用real, double precision。 ### 序列设计 -- 禁止手动添加与表相关的序列。 +- 禁止手动创建与表相关的序列,应指定serial/bingserial类型方式创建。 + +- 序列的步长建议设置为1。 + +- 不建议设置minvalue和maxvalue。 -- 建表时指定列的serial或bigserial类型的方式来创建序列。 +- 不建议设置cache,设置cache后序列号不连续。 + +- 禁止开启cycle。 - 序列应与代码中变量定义类型及范围一致,防止无法插入数据。 + ### constraint设计 #### 主键约束 @@ -388,14 +396,14 @@ DROP TABLESPACE - 建议不要用有业务含义的名称作为主键,比如身份证或者国家名称,尽管其是unique的。 -- 建议主键的一步到位的写法:id serial primary key 或id bigserial primary key。 +- 建议主键的一步到位的写法:`id serial primary key`或`id bigserial primary key`。 - 建议内容系统中size较大的table主键的等效写法如下,便于后续维护。 -```sql -create table test(id serial not null ); -create unique index CONCURRENTLY ON test (id); -``` + ```sql + create table test(id serial not null ); + create unique index CONCURRENTLY ON test (id); + ``` #### 唯一约束 @@ -405,34 +413,33 @@ create unique index CONCURRENTLY ON test (id); - 存在外键关系的表上尽量创建外键约束。 -- 使用外键时,一定要设置fk的action,例如cascade,set null,set default。 +- 性能要求高而安全性自己控制的系统不建议使用外键。 + +- 使用外键时,一定要设置fk的action,例如cascade、set null、set default。 #### 非空列 -- 所有非空列须在建表之初明确标识“NOT NULL”,上线之后,不再变更。同时需注意 NULL 与空字符‘’的查询结果差别:null 值会转换为 NULL,而空值无字符显示。 +- 所有非空列必须添加not null约束 #### 检查约束 -- 对于字段有检查性约束,一般要求指定check规则。例如:性别、状态等字段。 +- 对于字段有检查性约束,一般要求指定check规则。例如:性别、状态等字段。 ### index设计 -- MogDB/openGauss 提供的index类型: 行存表支持的索引类型:btree(行存表缺省值)、gin、gist。列存表支持的索引类型:Psort(列存表缺省值)、btree、gin。 -- 建议create 或 drop index 时,加 CONCURRENTLY参数,这是个好习惯,达到与写入数据并发的效果,列存表、分区表和临时表不支持CONCURRENTLY方式创建索引。 -- 建议对于频繁update、delete的包含于index 定义中的column的table,用create index CONCURRENTLY , drop index CONCURRENTLY的方式进行维护其对应index。 -- 建议用unique index 代替unique constraints便于后续维护。 -- 建议对where 中带多个字段and条件的高频 query,参考数据分布情况,建多个字段的联合index。 -- 每个表的index数量不能超过5个。 -- 复合索引的建立需要进行仔细分析: - - 正确选择复合索引中的第一个字段,一般是选择性较好的且在where子句中常用的字段上; - - 复合索引的几个字段是否经常同时以AND方式出现在Where子句中?单字段查询是否极少甚至没有?如果是,则可以建立复合索引;否则考虑单字段索引; - - 如果复合索引中包含的字段经常单独出现在Where子句中,则分解为多个单字段索引; - - 如果既有单字段索引,又有以这个字段为首列的复合索引,一般可考虑删除单字段索引; - - 复合索引第一个字段一般不使用时间字段,因为时间字段多用于范围扫描,而前面的字段使用范围扫描后,后续字段无法用于索引过滤。 - - 复合索引字段个数不能超过4个。 - -- 频繁DML(写次数明显超过读次数)的表,不要建立太多的索引。 - +- 频繁DML操作的表索引数量不建议超过5个。 +- create/drop index时添加concurrently参数。 +- 真正创建索引前可以使用虚拟索引确定索引的有效性。 +- 经常出现在关键字order by、group by、distinct后面的字段,建立索引。 +- 经常用作查询选择的字段,建立索引。 +- 经常用作表连接的属性上,建立索引。 +- 复合索引的字段数不建议超过3个。 +- 复合索引得一个字段是常用检索条件。 +- 复合索引第一个字段不应存在单字段索引。 +- 对数据很少被更新的表,经常只查询其中的几个字段,考虑使用索引覆盖。 +- 不要在有大量相同取值的字段上建立索引。 +- 建议用unique index代替unique constraints便于后续维护。 +- 建议对where中带多个字段and条件的高频query,参考数据分布情况,建多个字段的复合index。 - 无用的索引以及重复索引应删除,避免对执行计划及数据库性能造成负面影响。 ### view设计 @@ -457,133 +464,83 @@ create unique index CONCURRENTLY ON test (id); ### 关于NULL -- 说明:NULL 的判断:IS NULL ,IS NOT NULL。 - -- 说明:注意boolean 类型取值 true,false,NULL。 - -- 说明:小心NOT IN 集合中带有NULL元素。 - -```sql -mydb=# SELECT * FROM (VALUES(1),(2)) v(a) ; a - -\--- - - 1 - - 2 - -(2 rows) - -mydb=# select 1 NOT IN (1,NULL); - -?column? - -\--------- - -f - -(1 row) - -mydb=# select 2 NOT IN (1,NULL); - -?column? - -\--------- - -(1 row) - -mydb=# SELECT * FROM (VALUES(1),(2)) v(a) WHERE a NOT IN (1, NULL); a - -\--- - -(0 rows) -``` - -- 建议:使用count(1) 或count(*) 来统计行数,而不建议使用count(col) 来统计行数,因为NULL值不会计入。 - -- 规则:count(多列列名)时,多列列名必须使用括号,例如count( (col1,col2,col3) )。 - -- 注意:多列的count,即使所有列都为NULL,该行也被计数,所以效果与count(*) 一致。 - -- 注意:count(distinct col) 计算某列的非NULL不重复数量,NULL不被计数 - - count(distinct (col1,col2,...) ) 计算多列的唯一值时,NULL会被计数,同时NULL与NULL会被认为是相同的。 - -- 注意:NULL 的count与sum - -```sql -select count(1), count(a), sum(a) from (SELECT * FROM (VALUES (NULL), (2) ) v(a)) as foo where a is NULL; - -count | count | sum - --------+-------+----- - - 1 | 0 | - -(1 row) -``` - -- 判断两个值是否相同(将NULL视为相同的值) - -```sql -select null is distinct from null; - -?column? - -\--------- - -f - -(1 row) - -select null is distinct from 1; - -?column? - -\--------- - -t - -(1 row) - -select null is not distinct from null; - -?column? - -\--------- - -t - -(1 row) - -select null is not distinct from 1; - -?column? - -\--------- - -f - -(1 row) -``` +- 说明:`NULL`的判断:`IS NULL`、`IS NOT NULL`。 + +- 说明:注意`boolean`类型取值`true`、`false`、`NULL`。 + +- 说明:小心`NOT IN`集合中带有`NULL`元素。 + +- 建议:使用`count(1)`或`count(*)`来统计行数,而不建议使用`count(col)`来统计行数,因为`NULL`值不会计入。 + +- 规则:`count(多列列名)`时,多列列名必须使用括号,例如`count( (col1,col2,col3) )`。 + +- 注意:多列的`count`,即使所有列都为`NULL`,该行也被计数,所以效果与`count(*)`一致。 + +- 注意:`count(distinct col)`计算某列的非`NULL`不重复数量,`NULL`不被计数;`count(distinct (col1,col2,...) )`计算多列的唯一值时,`NULL`会被计数,同时`NULL`与`NULL`会被认为是相同的。 + +- 注意:`NULL`的`count`与`sum` + + ```sql + select count(1), count(a), sum(a) from (SELECT * FROM (VALUES (NULL), (2) ) v(a)) as foo where a is NULL; + count | count | sum + -------+-------+----- + 1 | 0 | + (1 row) + ``` + +- 判断两个值是否相同(将`NULL`视为相同的值) + + ```sql + select null is distinct from null; + ?column? + \--------- + f + (1 row) + + select null is distinct from 1; + ?column? + \--------- + t + (1 row) + + select null is not distinct from null; + ?column? + \--------- + t + (1 row) + + select null is not distinct from 1; + ?column? + \--------- + f + (1 row) + ``` ### 关于索引失效 - 在书写SQL语句时经常会在查询中使用函数及表达式,建议尽量不要在条件列上使用函数及表达式。在条件列上使用函数或者表达式的时候会导致使用不上该条件列上的索引,从而影响SQL的查询效率。尽量把函数或者表达式用在条件值上,避免使用在条件列上。示例: - `select name from tab where id+100>1000;` + ```sql + select name from tab where id+100>1000; + ``` 可以改写为如下形式: - `select name from tab where id>1000-100;` + ```sql + select name from tab where id>1000-100; + ``` - 查询语句中尽量不要使用左模糊查询。示例: - `select id from tab where name like '%ly';` + ```sql + select id from tab where name like '%ly'; + ``` -- 查询中尽量不要使用负向查询,如not in/like,示例: +- 查询中尽量不要使用负向查询,如`not in/like`,示例: - `select id from tab where name not in ('ly','ty');` + ```sql + select id from tab where name not in ('ly','ty'); + ``` ### 确保使用到所有变量和参数 @@ -593,47 +550,65 @@ f ### DDL操作 -- DB object 尤其是COLUMN 加COMMENT,便于后续新人了解业务及维护 +- 业务高峰期禁止对已存在的表执行任何DDL操作 -- 发给DBA 发布的DDL,附带常用SQL: SELECT, INSERT ,DELETE, UPDATE,便于DBA给出create index CONCURRENTLY等其他优化建议 +- 所有生产DDL操作必须经过开发测试环境验证 -- 向大size的table中add column时,将 alter table t add column col datatype not null default xxx;分解为如下,避免填充default值导致的过长时间锁表 +- 维护索引时应采用concurrently的方式 -```sql -alter table t add column col datatype ; +- 应该使用pg_repack替换vacuum full来重建表 -alter table t alter column col set default xxx; +- 大表添加带默认值的字段时,应拆分为:添加字段、填补默认值及添加非空约束三部分,如将`alter table t add column col datatype not null default xxx;`分解为如下,避免填充`default`值导致的过长时间锁表 -update table t set column= DEFAULT where id in ( select id from t where column is null limit + ```sql + alter table t add column col datatype ; + alter table t alter column col set default xxx; + update table t set column= DEFAULT where id in ( select id from t where column is null limit + 1000 ) ; \watch 3 + alter table t alter column col set not null; + ``` -1000 ) ; \watch 3 +### DML操作 -alter table t alter column col set not null; -``` +- 更新数据的SQL语句禁止出现`where 1=1` -### DML操作 +- 单条DML语句操作的数据量不超过10万 + +- 清空表中的数据时,应使用`truncate` + +- 对于风险性较高的操作,应该显示的开启事务,确认无误后在提交 + +- 事务中SQL逻辑尽量简单,操作执行完后要及时提交,避免`idle in transaction`状态 -- update 时做 <> 判断,比如update table_a set column_b = c where column_b <> c,禁止出现where 1=1 +- 大批数据入库时应使用`copy`替换`insert` -- 单条DML语句操作数据量不超过10万 +- 数据导入前考虑先删除索引,导入完成后重建 -- 建议清空表时,使用truncate,不建议使用delete ### DQL操作 -- 非必须时禁止使用select *,只取所需字段,以减少包括不限于网络带宽消耗,避免表结构变更对程序的影响(比如某些prepare query) +- 禁止使用`select *`,应用具体所需字段替换 -- 对报表类的或生成基础数据的查询,使用物化视图(MATERIALIZED VIEW)定期固化数据快照,避免对多表(尤其多写频繁的表)重复跑相同的查询 +- 禁止使用`where 1=1`,避免全表扫描或笛卡尔积 -- 复杂的统计查询可以尝试窗口函数 Window Functions +- 检索条件值应该与字段类型保持一致,防止不走索引 -- 避免关联字段数据类型不一致,禁止使用隐式类型转换 +- 等号左边的字段应该与索引保持一致,尤其是条件索引或函数索引 + +- 关注慢SQL的执行计划,如与预期不一致,尽快修改 + +- 使用`count(*)`或`count(1)`来统计行数,`count(column)`不会统计`null`行 + +- 限制`join`的数量,不建议超过3个 + +- 递归查询需要做好限制,防止无限循环 + +- 对于or运算,应该使用`union all`或`union`替换 -- 不同字段的or语句使用union代替 ### 数据导入 -- 建议大批量的数据入库时,使用copy,不建议使用insert,以提高写入速度 +- 建议大批量的数据入库时,使用`copy`,不建议使用`insert`,以提高写入速度 - 导入数据前需要先删除相关索引,导入完成后重建,提高数据导入速度 @@ -643,13 +618,13 @@ alter table t alter column col set not null; - 执行CRAETE、DROP、ALTER等DDL操作,尤其多条,不要显式的开transaction,因为加lock的mode非常高,极易产生deadlock -- state 为 idle in transaction 的连接,如果出现在Master,会无谓的lock住相应的资源,可导致后续产生lock,甚至deadlock; 出现在Slave,可导致卡住主从同步 +- state为`idle in transaction`的连接,如果出现在Master,会无谓的lock住相应的资源,可导致后续产生lock,甚至deadlock;出现在Slave,可导致卡住主从同步 ### 其他 -- 建议运行在SSD上的实例,random_page_cost (默认值为4) 设置为1.0~2.0之间,使查询规划器更倾向于使用索引扫描 +- 建议运行在SSD上的实例,random_page_cost(默认值为4)设置为1.0~2.0之间,使查询规划器更倾向于使用索引扫描 -- 建议在需要使用explain analyze查看实际真正执行计划与时间时,如果是写入query,强烈建议先开启事务,然后回滚。 +- 建议在需要使用`explain analyze`查看实际真正执行计划与时间时,如果是写入query,强烈建议先开启事务,然后回滚。 - 对于频繁更新,膨胀率较高的表,应找窗口期执行表重组,降低高水位 @@ -657,83 +632,85 @@ alter table t alter column col set not null; ### 建库规范 -MogDB/openGauss创建数据库使用PG兼容模式如下: +MogDB创建数据库使用PG兼容模式方法如下: +```sql create database dbnam DBCOMPATIBILITY='PG' encoding=’utf8’; +``` ### 数据类型 #### 数值类型 -在开发使用中,MogDB/openGauss只允许使用smallint、integer、bigint、numeric[(p[,s])]、serial、bigserial 这几种类型。 - -| 类型 | PostgreSQL | openGauss | 存储尺寸 | 备注 | -| ---------------- | ---------- | --------- | ------------ | ------------------------------------------------------- | -| tinyint | / | 支持 | 1字节 | 0 ~ 255 | -| smallint | 支持 | 支持 | 2字节 | -32,768 ~ +32,767 | -| integer | 支持 | 支持 | 4字节 | -2,147,483,648 ~ +2,147,483,647 | -| binary_integer | / | 支持 | / | integer别名 | -| bigint | 支持 | 支持 | 8字节 | -9,223,372,036,854,775,808 ~ +9,223,372,036,854,775,807 | -| decimal[(p[,s])] | 支持 | 支持 | 可变字节 | 最高小数点前131072位,以及小数点后16383位 | -| numeric[(p[,s])] | 支持 | 支持 | 可变字节 | 最高小数点前131072位,以及小数点后16383位 | -| number[(p[,s])] | / | 支持 | / | numeric别名 | -| real | 支持 | 支持 | 4字节 | 6位十进制数字精度 | -| float4 | / | 支持 | 4字节 | 6位十进制数字精度 | -| double precision | 支持 | 支持 | 8字节 | 15位十进制数字精度 | -| binary_double | / | 支持 | 8字节 | double precision别名 | -| float8 | / | 支持 | 8字节 | 15位十进制数字精度 | -| float[(p )] | / | 支持 | 4字节或8字节 | | -| dec[(p,[s])] | / | 支持 | / | 最高小数点前131072位,以及小数点后16383位 | -| integer[(p,[s])] | / | 支持 | / | 最高小数点前131072位,以及小数点后16383位 | -| smallserial | 支持 | 支持 | 2字节 | 1 ~ 32,767 | -| serial | 支持 | 支持 | 4字节 | 1 ~ 2,147,483,647 | -| bigserial | 支持 | 支持 | 8字节 | 1 ~ 9,223,372,036,854,775,807 | -| tinyint | / | 支持 | 1字节 | 0 ~ 255 | +在开发使用中,MogDB只允许使用smallint、integer、bigint、numeric[(p[,s])]、serial、bigserial这几种类型。 + +| 类型 | PostgreSQL | MogDB | 存储尺寸 | 备注 | +| ---------------- | ---------- | ----- | ------------ | ------------------------------------------------------- | +| tinyint | / | 支持 | 1字节 | 0 ~ 255 | +| smallint | 支持 | 支持 | 2字节 | -32,768 ~ +32,767 | +| integer | 支持 | 支持 | 4字节 | -2,147,483,648 ~ +2,147,483,647 | +| binary_integer | / | 支持 | / | integer别名 | +| bigint | 支持 | 支持 | 8字节 | -9,223,372,036,854,775,808 ~ +9,223,372,036,854,775,807 | +| decimal[(p[,s])] | 支持 | 支持 | 可变字节 | 最高小数点前131072位,以及小数点后16383位 | +| numeric[(p[,s])] | 支持 | 支持 | 可变字节 | 最高小数点前131072位,以及小数点后16383位 | +| number[(p[,s])] | / | 支持 | / | numeric别名 | +| real | 支持 | 支持 | 4字节 | 6位十进制数字精度 | +| float4 | / | 支持 | 4字节 | 6位十进制数字精度 | +| double precision | 支持 | 支持 | 8字节 | 15位十进制数字精度 | +| binary_double | / | 支持 | 8字节 | double precision别名 | +| float8 | / | 支持 | 8字节 | 15位十进制数字精度 | +| float[(p )] | / | 支持 | 4字节或8字节 | | +| dec[(p,[s])] | / | 支持 | / | 最高小数点前131072位,以及小数点后16383位 | +| integer[(p,[s])] | / | 支持 | / | 最高小数点前131072位,以及小数点后16383位 | +| smallserial | 支持 | 支持 | 2字节 | 1 ~ 32,767 | +| serial | 支持 | 支持 | 4字节 | 1 ~ 2,147,483,647 | +| bigserial | 支持 | 支持 | 8字节 | 1 ~ 9,223,372,036,854,775,807 | +| tinyint | / | 支持 | 1字节 | 0 ~ 255 | #### 字符类型 -在开发使用中,MogDB/openGauss只允许使用char(n)、varchar(n)、text字符类型。 +在开发使用中,MogDB只允许使用char(n)、varchar(n)、text字符类型。 -| 类型 | PostgreSQL | openGauss | 存储尺寸 | 备注 | -| ------------ | ---------- | --------- | ----------------------------- | ------------------------------------------------------------ | -| char(n) | 支持 | 支持 | pg中最大为1GB,og中最大为10MB | pg中n指字符数量,og中n指字节数量,兼容pg模式配置下n代表字符数量 | -| nchar(n) | / | 支持 | 最大为10MB | n指字节数量,兼容pg模式配置下n代表字符数量 | -| varchar(n) | 支持 | 支持 | pg中最大为1GB,og中最大为10MB | pg中n指字符数量,og中n指字节数量,兼容pg模式配置下n代表字符数量 | -| varchar2(n) | / | 支持 | 最大为10MB | varchar(n)别名 | -| nvarchar2(n) | / | 支持 | 最大为10MB | n指字符数量 | -| text | 支持 | 支持 | 1GB - 1 | | -| clob | / | 支持 | 1GB - 1 | text别名 | +| 类型 | PostgreSQL | MogDB | 存储尺寸 | 备注 | +| ------------ | ---------- | ----- | ----------------------------- | ------------------------------------------------------------ | +| char(n) | 支持 | 支持 | pg中最大为1GB,og中最大为10MB | pg中n指字符数量,og中n指字节数量,兼容pg模式配置下n代表字符数量 | +| nchar(n) | / | 支持 | 最大为10MB | n指字节数量,兼容pg模式配置下n代表字符数量 | +| varchar(n) | 支持 | 支持 | pg中最大为1GB,og中最大为10MB | pg中n指字符数量,og中n指字节数量,兼容pg模式配置下n代表字符数量 | +| varchar2(n) | / | 支持 | 最大为10MB | varchar(n)别名 | +| nvarchar2(n) | / | 支持 | 最大为10MB | n指字符数量 | +| text | 支持 | 支持 | 1GB - 1 | | +| clob | / | 支持 | 1GB - 1 | text别名 | #### 时间类型 -在开发使用中,MogDB/openGauss只允许使用timestamp[(p )][with time zone]、date日期类型。 +在开发使用中,MogDB只允许使用`timestamp[(p)][with time zone]`、`date`期类型。 -| 类型 | PostgreSQL | openGauss | 存储尺寸 | 备注 | +| 类型 | PostgreSQL | MogDB | 存储尺寸 | 备注 | | ----------------------------------- | ---------- | --------- | -------- | ------------------------------------------------------- | | timestamp[(p )][without time zone] | 支持 | 支持 | 8字节 | 4713 BC - 294276 AD | -| timestamp[(p )][with time zone] | 支持 | 支持 | 8字节 | 4713 BC - 294276 AD | +| timestamp[(p )][with time zone] | 支持 | 支持 | 8字节 | 4713 BC - 294276 AD | | date | 支持 | 支持 | 4字节 | 4713 BC - 5874897 AD (og实际存储空间大小为8字节) | -| time[(p )][without time zone] | 支持 | 支持 | 8字节 | 00:00:00 - 24:00:00 | -| time[(p )][with time zone] | 支持 | 支持 | 12字节 | 00:00:00+1459 - 24:00:00-1459 | -| interval[fields][(p )] | 支持 | 支持 | 16字节 | -178000000年 - 178000000年 | +| time[(p )][without time zone] | 支持 | 支持 | 8字节 | 00:00:00 - 24:00:00 | +| time[(p )][with time zone] | 支持 | 支持 | 12字节 | 00:00:00+1459 - 24:00:00-1459 | +| interval[fields][(p )] | 支持 | 支持 | 16字节 | -178000000年 - 178000000年 | | smalldatetime | / | 支持 | 8字节 | 日期和时间,不带时区,精确到分钟,秒位大于等于30秒进一位 | -| interval day(1) to second(p ) | / | 支持 | 16字节 | | +| interval day(1) to second(p) | / | 支持 | 16字节 | | | reltime | / | 支持 | 4字节 | | #### json类型 -MogDB/openGauss只允许使用json类型。 +MogDB只允许使用json类型。 -| 类型 | PostgreSQL | openGauss | 存储尺寸 | 备注 | -| ----- | ---------- | --------- | -------- | ---- | -| json | 支持 | 支持 | / | | -| jsonb | 支持 | / | / | | +| 类型 | PostgreSQL | MogDB | 存储尺寸 | 备注 | +| ----- | ---------- | ----------- | -------- | ---- | +| json | 支持 | 支持 | / | | +| jsonb | 支持 | 2.1开始支持 | / | | ### 关键字 -PostgreSQL与openGauss数据库中关键字差异化说明,保留是指数据库保留关键字,不允许自定义使用;非保留或空是指可以自定义使用,MogDB/openGauss保留字详情,请参考附件。 +PostgreSQL与MogDB数据库中关键字差异化说明,保留是指数据库保留关键字,不允许自定义使用;非保留或空是指可以自定义使用,MogDB保留字详情,请参考[此页面](2-keywords)。 -| 关键字 | MogDB/openGauss | PostgreSQL | +| 关键字 | MogDB | PostgreSQL | | ------------- | ------------------------ | ------------------------ | | AUTHID | 保留 | #N/A | | BUCKETS | 保留 | #N/A | @@ -760,18 +737,19 @@ PostgreSQL与openGauss数据库中关键字差异化说明,保留是指数据 | SYSDATE | 保留 | #N/A | | VERIFY | 保留 | #N/A | -### 隐式转换对应表 - -| input_type | target_type | MogDB/openGauss | -| ----------- | ---------------------------------------------------------- | --------------- | -| bool | int2、int4、int8 | 支持 | -| int2 | bool、text、varchar、interval | 支持 | -| int4 | bool、int2、text、varchar、interval | 支持 | -| int8 | bool、text、varchar | 支持 | -| text | int8、int4、int2、float4、float8、date、timestamp、nemeric | 支持 | -| float4 | int8、int4、int2、text、varchar | 支持 | -| float8 | int8、int4、int2、text、float4、varchar、interval、numeric | 支持 | -| date | text、varchar | 支持 | -| timestamp | text、varchar | 支持 | -| timestamptz | text | 支持 | -| numeric | int8、int4、int2、text、varchar、interval | 支持 | +### 隐式转换异同 + +| input_type | target_type | MogDB | PG | +| ----------- | ------------------------------------------------------------ | ---------------- | -------------------------------------------------------- | +| bool | int2、int4、int8 | 支持 | int4不支持,其它无 | +| int2 | bool、text、bpchar、varchar、interval | 支持(bpchar无) | 无 | +| int4 | bool、int2、text、bpchar、varchar、interval | 支持(bpchar无) | bool不支持,int2为in assignment,其它无 | +| int8 | bool、text、bpchar、varchar | 支持(bpchar无) | 无 | +| text | int8、int4、int2、float4、float8、date、timestamp、nemeric | 支持 | 无 | +| float4 | int8、int4、int2、text、bpchar、varchar | 支持(bpchar无) | 前三in assignment,后无 | +| float8 | int8、int4、int2、text、float4、bpchar、varchar、interval、numeric | 支持(bpchar无) | int8、int4、int2、float4、numeric为in assignment,其它无 | +| bpchar | int8、int4、date、timestamp、numeric | | | +| date | text、bpchar、varchar | 支持(bpchar无) | 无 | +| timestamp | text、varchar | 支持 | 无 | +| timestamptz | text | 支持 | 无 | +| numeric | int8、int4、int2、text、bpchar、varchar、interval | 支持(bpchar无) | 前三in assignment,后无 | -- Gitee From a63380d9a304ba9765e2bcc52281addef466fdb5 Mon Sep 17 00:00:00 2001 From: spaceoddity91719 Date: Wed, 20 Apr 2022 11:30:57 +0800 Subject: [PATCH 5/6] fix markdownlint --- .../developer-guide/dev/1-development-specifications.md | 2 +- .../developer-guide/dev/1-development-specifications.md | 2 +- .../developer-guide/dev/1-development-specifications.md | 6 ------ .../developer-guide/dev/1-development-specifications.md | 6 ------ 4 files changed, 2 insertions(+), 14 deletions(-) diff --git a/product/en/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md b/product/en/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md index 9217c892..abafcf71 100644 --- a/product/en/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md +++ b/product/en/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md @@ -403,7 +403,7 @@ Apart from the primary key, unique constraint is needed. You can create a unique #### Foreign Key Constraint - You'd better create foreign key constraints for a table with foreign key relationship. -- The use of foreign keys is not recommended for systems with high performance requirements and security under your control. +- The use of foreign keys is not recommended for systems with high performance requirements and security under your control. - When using the foreign key, you must set the action of the foreign key, such as cascade, set null, or set default. #### Non-Empty Column diff --git a/product/en/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md b/product/en/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md index 9217c892..abafcf71 100644 --- a/product/en/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md +++ b/product/en/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md @@ -403,7 +403,7 @@ Apart from the primary key, unique constraint is needed. You can create a unique #### Foreign Key Constraint - You'd better create foreign key constraints for a table with foreign key relationship. -- The use of foreign keys is not recommended for systems with high performance requirements and security under your control. +- The use of foreign keys is not recommended for systems with high performance requirements and security under your control. - When using the foreign key, you must set the action of the foreign key, such as cascade, set null, or set default. #### Non-Empty Column diff --git a/product/zh/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md b/product/zh/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md index 2c2b12ec..4fd75238 100644 --- a/product/zh/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md +++ b/product/zh/docs-mogdb/v2.1/developer-guide/dev/1-development-specifications.md @@ -54,7 +54,6 @@ date: 2021-04-27 - table能包含的column数目,根据字段类型的不同,数目在250到1600之间 - ### 临时及备份对象命名 - 临时或备份的数据库对象名,如table,建议添加日期,如`dba.trade_record_1970_01_01`(其中dba 为DBA专用schema,trade_record为表名,1970_01_01为备份日期)。 @@ -158,7 +157,6 @@ SELECT 1 > 说明:比如A表user_id字段数据类型定义为varchar,但是SQL语句查询为`where user_id=1234;` - ### partition table设计 - 分区表的个数不建议超过1000个。 @@ -372,7 +370,6 @@ DROP TABLESPACE - MogDB新建数据库默认兼容oracle,not null约束不允许传入空字符串,空字符串默认会转换为null,兼容PG模式的数据库不会有这个问题。 - ### 序列设计 - 禁止手动创建与表相关的序列,应指定serial/bingserial类型方式创建。 @@ -387,7 +384,6 @@ DROP TABLESPACE - 序列应与代码中变量定义类型及范围一致,防止无法插入数据。 - ### constraint设计 #### 主键约束 @@ -584,7 +580,6 @@ DROP TABLESPACE - 数据导入前考虑先删除索引,导入完成后重建 - ### DQL操作 - 禁止使用`select *`,应用具体所需字段替换 @@ -605,7 +600,6 @@ DROP TABLESPACE - 对于or运算,应该使用`union all`或`union`替换 - ### 数据导入 - 建议大批量的数据入库时,使用`copy`,不建议使用`insert`,以提高写入速度 diff --git a/product/zh/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md b/product/zh/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md index 2c2b12ec..4fd75238 100644 --- a/product/zh/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md +++ b/product/zh/docs-mogdb/v3.0/developer-guide/dev/1-development-specifications.md @@ -54,7 +54,6 @@ date: 2021-04-27 - table能包含的column数目,根据字段类型的不同,数目在250到1600之间 - ### 临时及备份对象命名 - 临时或备份的数据库对象名,如table,建议添加日期,如`dba.trade_record_1970_01_01`(其中dba 为DBA专用schema,trade_record为表名,1970_01_01为备份日期)。 @@ -158,7 +157,6 @@ SELECT 1 > 说明:比如A表user_id字段数据类型定义为varchar,但是SQL语句查询为`where user_id=1234;` - ### partition table设计 - 分区表的个数不建议超过1000个。 @@ -372,7 +370,6 @@ DROP TABLESPACE - MogDB新建数据库默认兼容oracle,not null约束不允许传入空字符串,空字符串默认会转换为null,兼容PG模式的数据库不会有这个问题。 - ### 序列设计 - 禁止手动创建与表相关的序列,应指定serial/bingserial类型方式创建。 @@ -387,7 +384,6 @@ DROP TABLESPACE - 序列应与代码中变量定义类型及范围一致,防止无法插入数据。 - ### constraint设计 #### 主键约束 @@ -584,7 +580,6 @@ DROP TABLESPACE - 数据导入前考虑先删除索引,导入完成后重建 - ### DQL操作 - 禁止使用`select *`,应用具体所需字段替换 @@ -605,7 +600,6 @@ DROP TABLESPACE - 对于or运算,应该使用`union all`或`union`替换 - ### 数据导入 - 建议大批量的数据入库时,使用`copy`,不建议使用`insert`,以提高写入速度 -- Gitee From 71460aa7178c0791f94f6dfc9bda8d24810c7bcd Mon Sep 17 00:00:00 2001 From: spaceoddity91719 Date: Wed, 20 Apr 2022 14:58:36 +0800 Subject: [PATCH 6/6] sca 5.1 --- product/en/docs-sca/v5.1/command_options.md | 319 ++++++++++++++++++++ product/en/docs-sca/v5.1/db2_to_mogdb.md | 72 +++++ product/en/docs-sca/v5.1/mysql_to_mogdb.md | 88 ++++++ product/en/docs-sca/v5.1/oracle_to_mogdb.md | 105 +++++++ product/en/docs-sca/v5.1/overview.md | 58 ++++ product/en/docs-sca/v5.1/pg_to_mogdb.md | 74 +++++ product/en/docs-sca/v5.1/release-notes.md | 87 ++++++ product/en/docs-sca/v5.1/result.md | 182 +++++++++++ product/en/docs-sca/v5.1/toc.md | 16 + product/en/docs-sca/v5.1/usage.md | 70 +++++ product/zh/docs-sca/v5.1/command_options.md | 319 ++++++++++++++++++++ product/zh/docs-sca/v5.1/db2_to_mogdb.md | 72 +++++ product/zh/docs-sca/v5.1/mysql_to_mogdb.md | 86 ++++++ product/zh/docs-sca/v5.1/oracle_to_mogdb.md | 105 +++++++ product/zh/docs-sca/v5.1/overview.md | 59 ++++ product/zh/docs-sca/v5.1/pg_to_mogdb.md | 73 +++++ product/zh/docs-sca/v5.1/release-notes.md | 87 ++++++ product/zh/docs-sca/v5.1/result.md | 173 +++++++++++ product/zh/docs-sca/v5.1/toc.md | 16 + product/zh/docs-sca/v5.1/usage.md | 70 +++++ src/utils/config.js | 59 ++-- 21 files changed, 2163 insertions(+), 27 deletions(-) create mode 100644 product/en/docs-sca/v5.1/command_options.md create mode 100644 product/en/docs-sca/v5.1/db2_to_mogdb.md create mode 100644 product/en/docs-sca/v5.1/mysql_to_mogdb.md create mode 100644 product/en/docs-sca/v5.1/oracle_to_mogdb.md create mode 100644 product/en/docs-sca/v5.1/overview.md create mode 100644 product/en/docs-sca/v5.1/pg_to_mogdb.md create mode 100644 product/en/docs-sca/v5.1/release-notes.md create mode 100644 product/en/docs-sca/v5.1/result.md create mode 100644 product/en/docs-sca/v5.1/toc.md create mode 100644 product/en/docs-sca/v5.1/usage.md create mode 100644 product/zh/docs-sca/v5.1/command_options.md create mode 100644 product/zh/docs-sca/v5.1/db2_to_mogdb.md create mode 100644 product/zh/docs-sca/v5.1/mysql_to_mogdb.md create mode 100644 product/zh/docs-sca/v5.1/oracle_to_mogdb.md create mode 100644 product/zh/docs-sca/v5.1/overview.md create mode 100644 product/zh/docs-sca/v5.1/pg_to_mogdb.md create mode 100644 product/zh/docs-sca/v5.1/release-notes.md create mode 100644 product/zh/docs-sca/v5.1/result.md create mode 100644 product/zh/docs-sca/v5.1/toc.md create mode 100644 product/zh/docs-sca/v5.1/usage.md diff --git a/product/en/docs-sca/v5.1/command_options.md b/product/en/docs-sca/v5.1/command_options.md new file mode 100644 index 00000000..9747528e --- /dev/null +++ b/product/en/docs-sca/v5.1/command_options.md @@ -0,0 +1,319 @@ +--- +title: SCA Command Options +summary: SCA Command Options +author: hongyedba +date: 2021-09-30 +--- + +# SCA Command Options + +SCA is a command line tool. All functions can be realized through single executable program. + +## General Options + +General options may be used in all types of tasks. + +### --help (Help Information) + +Before using the analysis program, you are advised to read help information carefully. (Command: `./sca_linux_x86_64 --help`): + +```shell +Introduction: + SCA is a tool used to do SQL life-cycle inspection and simulation + when migration from heterogeneous database to MogDB. + Source databases supported as below: + 1. Oracle : SQL Inspection, Simulation + 2. DB2 : SQL Inspection + 3. MySQL : SQL Inspection + 4. PostgreSQL : SQL Inspection + +Options: + --[ Overall ]-- + --help : Show help message + -v, --version : Show SCA version + -T, --type : Run type: + : I = Init SCA repository + : L = Apply for license + : ----------------[Analysis: Target required]--------------- + : OI = Oracle Inspection, this is default type + : MI = MySQL Inspection + : DI = DB2 Inspection + : PI = PostgreSQL Inspection + : OS = Oracle Simulation, just like Oracle SPA + : OIS = Oracle Inspection & Simulation + : ------------[Collection: No need of target DB]------------ + : OC = Oracle Collection + : MC = MySQL Collection + : DC = DB2 Collection + : PC = PostgreSQL Collection + -d, --data : Unzipped data directory for analyzer, or directory for collection + -D, --data-id : Use data with data id existed in the repository + -w, --workers : Parallel workers for tasks, default: 10 + -x, --debug : Enable debug mode + -l, --logfile : Output to both logfile (without progress-bar) and screen (with progress-bar) + -L, --log-only : Output to only logfile (without progress-bar) + -F, --force : Force mode in REPO Creation, drop old objects before create it + -r, --report : Final report file location, default in data directory with name 'report' + --license : License file, default is [./license.json] + --sql-transformer : Regular rules for SQL transformation (for internal use) + : Format: [{"name": "xxx" + : "source": "xxx", + : "target": "xxx", + : "comment": "xxx"}, ...] + + --[ Repository Connection ]-- + -H, --repo-host : Repository DB Server host address, default: 127.0.0.1 + -P, --repo-port : Repository DB server port, default: 5432 + -N, --repo-name : Repository database, default: sca_db + -U, --repo-user : Repository user, default: sca_repo + -E, --repo-password : Repository password, default: SCA@password + --user : Administrator used to create repository DB and user, default: mogdb + --password : Password for Administrator, default: mogdb + + --[ Source & Target Connection ]-- + -h, --db-host : Source & Target DB Server host address, default same as -H + -p, --db-port : Source & Target DB server port, default same as -P + -n, --db-name : Source & Target database, default same as -N + -u, --db-user : Source & Target user, default same as -U + -e, --db-password : Source & Target password, default same as -E + --target-type : Target database type in analysis tasks, default: MOGDB + : Valid type: ORACLE, MOGDB, OPENGAUSS, POSTGRESQL, MYSQL, DB2 + + --[ Collection Options ]-- + -q, --sql-days : How many days for session sql data, default: 7 + -Q, --sql-interval : SQL collect interval in seconds, default: 600 + -s, --schema-include : Users/Schemas included in data collection, default: '' + -S, --schema-exclude : Users/Schemas excluded in data collection + : Default: <> + -m, --enable-monitor : Starting background monitor process in SQL Collection + : Valid values: 1/on/true/t = ENABLE, default: on + : 0/off/false/f = DISABLE + --slow-log : MySQL slow-log for client data collection + --sql-csv : SQL file in csv format for SQL Inspection (@todo) + +Usage: + 0. Apply for license + ./sca_macos_x86_64 -T L + 1. Init repository (used for first running) + ./sca_macos_x86_64 -T i -H -P -N -U -E --user --password + 2. Oracle data collection + # Notice: "-q 0.001 -Q 60" means gather Session SQL only once + # "-m off" means do not monitor system status (CPU Idle and Disk Free) + ./sca_macos_x86_64 -T OC -s SCOTT -h -p -n '' -u -e -q 0.001 -Q 60 -m off + ./sca_macos_x86_64 -T OC -s SCOTT -h -p -n '' -u -e + 3. MySQL data collection using slow-log file + ./sca_macos_x86_64 -T MC -d --slow-log= + 4. Oracle SQL compatible analysis (Required: Repository, Target DB) + Note: use [H/P/N/U/E] options to assign the repository + use [h/p/n/u/e] options to assign the target database + ./sca_macos_x86_64 -T OI -d -n + 5. Oracle SQL performance simulation (Required: Repository, Target DB) + Note: use [H/P/N/U/E] options to assign the repository + use [h/p/n/u/e] options to assign the target database + ./sca_macos_x86_64 -T OS -d -n + 6. MySQL SQL compatible analysis (Required: Repository, Target DB) + Not e: use [H/P/N/U/E] options to assign the repository + use [h/p/n/u/e] options to assign the target database + ./sca_macos_x86_64 -T MI -d -h -p -n +``` + +### -v, --version (Version) + +View the current version of SCA: + +```shell +hongyedba@localhost ~ % ./sca_linux_x86_64 -v +SCA version: 5.1.0 +``` + +### -T, --type (Task Type) + +Default value: OI + +Specifies the task type. Currently, SCA supports the following types of tasks: + +1. `I [Initialize]`: **Initialize**, used to initialize the SCA repository +2. `L [Apply License]`: **Apply license**, task for license application +3. `OC [Oracle Collection]`: **Oracle collection**, used to collect the business SQL executed in Oracle database, need long time to collect +4. `MC [MySQL Collection]`: **MySQL collection**, used to collect the business SQL executed in MySQL database, need to configure the slow log in advance, and then collect at once +5. `DC [DB2 Collection]`: **DB2 collection**, used to collect the business SQL executed in DB2 database, need long time to collect +6. `OI [Oracle Inspection]`: **Oracle compatibility evaluation**, used to evaluate the actual compatibility of business SQL collected in Oracle on the source side in MogDB on the target side +7. `MI [MySQL Inspection]`: **MySQL compatibility evaluation**, used to evaluate the actual compatibility of business SQL collected in MySQL on the source side in MogDB on the target side +8. `DI [DB2 Inspection]`: **DB2 compatibility evaluation**, used to evaluate the actual compatibility of business SQL collected in DB2 on the source side in MogDB on the target side +9. `OS [Oracle Simulation]`: **Oracle performance evaluation**, used to evaluate the execution performance of business SQL collected in Oracle on the source side in MogDB on the target side +10. `OIS [Oracle Inspection & Simulation]`: **Oracle compatibility and performance evaluation**, equivalent to OI + OS two tasks at the same time + +### -d, --data (Data Directory) + +The -d data directory can be specified for all task types except repository initialization (-T I). + +The collection task writes the collected data to the data directory specified by -d. + +The analysis task reads data from the data directory, inserts the data into the repository table, and the final generated report results are also written to the data directory by default. + +### -D, --data-id (Data ID) + +Specify the data ID and then read the data with the specified ID directly from the repository instead of re-reading and loading the data from the data directory. + +Specifying the -D option skips the step of loading data and performs the relevant analysis task directly. + +## -w, --workers (Degree of Parallelism) + +The default degree of parallelism is 10. + +Specifies the degree of parallelism for running tasks. Appropriate degree of parallelism can improve the running speed of each task. + +Applicable to: file data loaded to a data repository, SQL compatibility assessment, SQL complexity assessment, SQL performance simulation, and other operations + +**Note**: + +* In SQL simulation tasks, a greater degree of parallelism may lead to degradation of the execution efficiency of single SQL. Therefore, an appropriate degree of parallelism needs to be chosen according to the loading of the actual production environment and is generally set to the average number of active sessions of a production database. + +### -x, --debug (Debug Mode) + +Enabling the debug mode does not affect normal analysis logic but will output a large number of logs. The debug mode is usually used for assisting the analysis program in running exceptions itself. + +### -l, --logfile (Log File) + +Specifies the log file of a program. The program will output data to both the terminal command line and log file. + +### -L, --log-only (Log File) + +Specifies the log file of a program. The program will output data only to the log file. + +### -F, --force (Forcible Mode) + +Specifies whether to enable the forcible mode. This mode takes effect in the data repository initialization scenarios. + +During the initialization of a data repository, if the forcible mode is enabled, the data repository will be deleted first and then re-created. + +### -r, --report (Report Directory) + +Default value: /report + +Specifies the report directory. The report here includes the compatibility evaluation and SQL simulation reports in the HTML format, which can be viewed offline. + +Also, for the `compatibility evaluation` task, a `sql_detail_list.csv` file is generated to record the evaluation results of all SQL and possible rewriting options. + +### --license (License File) + +Default value: ./license.json + +The location of the License file. + +### --sql-transformer (SQL transform rules) + +[Advanced Usage] Specify the SQL transform rules from the source database to the target database, and use regular matching for transform. + +## Repository Options + +### -H, --repo-host (Repository IP Addresses) + +Default value: 127.0.0.1 + +Repository database IP Addresses. + +### -P, --repo-port (Repository Port) + +Default value: 5432 + +Repository database port. + +### -N, --repo-name (Repository Name) + +Default value: sca_db + +Repository database name. + +### -U, --repo-user (Repository User) + +Default value: sca_repo + +Repository database login user. + +### -E, --repo-password (Repository Password) + +Default value: SCA@password + +Repository database login password. + +### --user (Administrator User) + +Repository database administrator user, used to create repository user and repository database during the initialization of the repository. + +### --password (Administrator Password) + +Repository administrator user login password, used to create repository user and repository database during the initialization of the repository. + +## Source or Target Database Connection + +### -h, --db-host (Source or Target Database IP Addresses) + +Source or target database IP Addresses, inherits the -H option value by default. + +### -p, --db-port (Source or Target Database Port) + +Source or target database port, inherits the -P option value by default. + +### -n, --db-name (Source or Target Database Name) + +Source or target database Name, inherits the -N option value by default. + +In the SQL Analysis task, specify the target database name, which is usually the target database created on the MogDB side when using MTK or other data migration tools for database structure and data migration. + +Note that the repository user needs to have full operational privileges to the target database by default, and the default repository is administrator privileges. + +* In the compatibility assessment, only the target database needs to have an object structure +* In SQL simulation, the target database needs to have both object structure and real peer-to-peer full production data, otherwise the performance simulation results are not informative + +### -u, --db-user (Source or Target Database User) + +Source or target database login user, inherits the -U option value by default. + +### -e, --db-password (Source or Target Database Password) + +Source or target database login password, inherits the -E option value by default. + +### --target-type (Target Database Type) + +Specify the target database type in the analysis task, the default is MOGDB. + +Currently this parameter is not perfect, only MOGDB/POSTGRESQL is supported. + +## Data Collection Options + +## -q, --sql-days (Number of Days for SQL Collection) + +Specifies the total number of days for SQL-related data collection. Data miss may occur in collecting the executed SQL data from session cache GV\$SQLAREA. You can prolong the number of collection days to reduce the data collection miss possibility. + +By default, SQL data of a week will be collected. + +### -Q, --sql-interval (SQL Collection Interval) + +Specifies the SQL-related data collection interval. By default, the SQL data is collected every 10 minutes. + +The SQL data collected every time will be compared with the collected data to filter out the repeated data. This can prevent the data file from being too large. + +### -s, schema-include (Schema Whitelist) + +Specifies the schema whitelist for data collection. Only schema-related data listed in the whitelist is collected. + +### -S, schema-exclude (Schema Blacklist) + +Specifies the schema blacklist for data collection. Schema-related data listed in the blacklist is not collected. + +By default, Oracle system users are listed in the schema blacklist. + +### -m, enable-monitor (Resource Monitoring) + +Specifies whether to enable the resource monitoring process in the background. This process is enabled by default. + +The resource monitoring process queries the CPU usage of the current server every 3s and the remaining space of the file system where the data directory is located. + +When the CPU usage is greater than 90% or the remaining space of the file system is lower than 100 MB, the monitoring sub-process will stop collecting data from the primary process so that the server will not become faulty due to resource problems. + +### --slow-log (Slow Query Log) + +Specifies the MySQL slow query log file. + +When the collection program cannot access the target MySQL database and the collection program is not running on the MySQL server, you can manually retrieve the MySQL slow log from the target database and use the current option to specify the slow log to parse and generate the corresponding collection data, which can be used for subsequent MySQL SQL compatibility evaluation tasks. \ No newline at end of file diff --git a/product/en/docs-sca/v5.1/db2_to_mogdb.md b/product/en/docs-sca/v5.1/db2_to_mogdb.md new file mode 100644 index 00000000..eaa2f792 --- /dev/null +++ b/product/en/docs-sca/v5.1/db2_to_mogdb.md @@ -0,0 +1,72 @@ +--- +title: DB2 to MogDB evaluation +summary: DB2 to MogDB evaluation +author: Hong Rihua +date: 2022-03-07 +--- + +# DB2 to MogDB Evaluation + +## MogDB Environment Preparation + +You need to prepare your own MogDB/openGauss database environment for the repository and the target database, and the same environment is used by default for the repository and the target database. + +## Object Structure Migration + +- **Structure Migration**: You need to create the target database in the prepared MogDB/openGauss environment and migrate the data structures from the source database to the newly created target database. +- **Data Migration**: If you want to perform SQL performance comparison evaluation, then in addition to migrating the database structure, you also need to migrate the table data to ensure that the source DB2 and the target MogDB/openGauss database data size is the same for the performance comparison to be meaningful. + +Related Tool: + +- **MTK**: Structure and data migration can be done by MTK, for details please refer to [https://mogdb.io/mtk](https://mogdb.io/mtk) + +## DB2 Data Collection + +Currently DB2's data collection is a continuous collection of SQL from the cache. By default, the collection process is performed periodically (every 10 minutes) for a total of one week's worth of cached SQL data. + +### Required Permissions + +The permissions required for the collection user are as follows. + +> dbadm + +### Related Commands + +```shell +./sca_linux_x86_64 -T DC -h -p -n -u -e + +# Command options: +# -h/p/t/u/e specify the connection method of the source DB2 database +# Considering that the SQL content in the shared cache may not be comprehensive, the collection will be continuous and incremental, with one week of data collected by default. If you are only doing functional validation, you can use the -q 0.001 -Q 60 option, i.e. only collect SQL data once. +``` + +### Collection Results + +When the collection is complete, a zip packet is generated which can be copied to the target database and unpacked into a data directory. + +Subsequent compatibility analysis relies on the data in this packet and does not require another connection to the source DB2 database. + +## Repository Initialization + +```shell +# Use the MogDB/openGauss user with administrator privileges for repository initialization +# Repository initialization will create a database with the name sca_db by default +# You can specify the repository name by -N, the repository user name by -U, and the repository user password by -E + +./sca_linux_x86_64 -T i -H -P -N sca_db -U sca_repo -E 'SCA@password' --user --password +``` + +## Perform Analysis Tasks + +### Compatibility Analysis + +DB2 database currently only supports SQL compatibility analysis with the following command. + +```shell +# If the repository name, user name, password are not the default, you need to use the -N, -U, -E options to specify +# If the target database information is different from the repository, use the -h, -p, -n, -u, -e options to specify + +./sca_linux_x86_64 -T DI -H -P -n -d +``` + +When the analysis is complete, a report will be generated in the directory specified by **-d**, which can be downloaded offline for viewing. diff --git a/product/en/docs-sca/v5.1/mysql_to_mogdb.md b/product/en/docs-sca/v5.1/mysql_to_mogdb.md new file mode 100644 index 00000000..59a0eaaf --- /dev/null +++ b/product/en/docs-sca/v5.1/mysql_to_mogdb.md @@ -0,0 +1,88 @@ +--- +title: MySQL to MogDB Evaluation +summary: MySQL to MogDB Evaluation +author: Hong Rihua +date: 2022-03-07 +--- + +# MySQL to MogDB Evaluation + +## MogDB Environment Preparation + +You need to prepare your own MogDB/openGauss database environment for the repository and the target database, and the same environment is used by default for the repository and the target database. + +## Object Structure Migration + +- **Structure Migration**: You need to create the target database in the prepared MogDB/openGauss environment and migrate the data structures from the source database to the newly created target database. +- **Data Migration**: If you want to perform SQL performance comparison evaluation, then in addition to migrating the database structure, you also need to migrate the table data to ensure that the source MySQL and the target MogDB/openGauss database data size is the same for the performance comparison to be meaningful. + +Related Tool: + +- **MTK**: Structure and data migration can be done by MTK, for details please refer to [https://mogdb.io/mtk](https://mogdb.io/mtk) + +## MySQL Data Collection + +Currently MySQL source database SQL collection relies on slow logs (either to log files, or to database tables) to collect all SQL recorded in the slow logs at once. + +To avoid SQL misses, you must ensure that the entire business cycle has been tested and executed or covered. + +### Required Permissions + +The permissions required for the collection user are as follows. + +> root + +### Enable Slow SQL Logging + +- You need to enable slow logging in advance to ensure that the complete business cycle data is recorded in the slow log, it is recommended to enable slow logging one week in advance +- Set slow SQL threshold interval to 0 + +```sql +SET GLOBAL slow_query_log=ON; +SET GLOBAL long_query_time=0.001; +set global log_output='FILE'; -- log_output='TABLE' is also supported, but the content is relatively small +``` + +### Data Collection + +It is recommended to execute the data collection command after running a full business cycle with slow logging enabled. + +It is recommended to execute the data collection command after running a full business cycle and full process business is done with slow logging enabled. + +```shell +./sca_linux_x86_64 -T MC -h -p -n -u -e + +# Command options: +# -h/p/t/u/e specify the connection method of the source MySQL database +``` + +### Collection Results + +When the collection is complete, a zip packet is generated which can be copied to the target database and unpacked into a data directory. + +Subsequent compatibility analysis relies on the data in this packet and does not require another connection to the source MySQL database. + +## Repository Initialization + +```shell +# Use the MogDB/openGauss user with administrator privileges for repository initialization +# Repository initialization will create a database with the name sca_db by default +# You can specify the repository name by -N, the repository user name by -U, and the repository user password by -E + +./sca_linux_x86_64 -T i -H -P -N sca_db -U sca_repo -E 'SCA@password' --user --password +``` + +## Perform Analysis Tasks + +### Compatibility Analysis + +MySQL database currently only supports SQL compatibility analysis with the following command. + +```shell +# If the repository name, username, password are not the default, you need to use the -N, -U, -E options to specify +# If the target database information is different from the repository, use the -h, -p, -n, -u, -e options to specify + +./sca_linux_x86_64 -T OI -H -P -n -d +``` + +When the analysis is complete, a report will be generated in the directory specified by **-d**, which can be downloaded offline for viewing. diff --git a/product/en/docs-sca/v5.1/oracle_to_mogdb.md b/product/en/docs-sca/v5.1/oracle_to_mogdb.md new file mode 100644 index 00000000..f897857c --- /dev/null +++ b/product/en/docs-sca/v5.1/oracle_to_mogdb.md @@ -0,0 +1,105 @@ +--- +title: Oracle to MogDB Evaluation +summary: Oracle to MogDB Evaluation +author: Hong Rihua +date: 2022-03-07 +--- + +# Oracle to MogDB Evaluation + +## MogDB Environment Preparation + +You need to prepare your own MogDB/openGauss database environment for the repository and the target database, and the same environment is used by default for the repository and the target database. + +## Object Structure Migration + +- **Structure Migration**: You need to create the target database in the prepared MogDB/openGauss environment and migrate the data structures from the source database to the newly created target database. +- **Data Migration**: If you want to perform SQL performance comparison evaluation, then in addition to migrating the database structure, you also need to migrate the table data to ensure that the source Oracle and the target MogDB/openGauss database data size is the same for the performance comparison to be meaningful. + +Related Tool: + +- **MTK**: Structure and data migration can be done by MTK, for details please refer to [https://mogdb.io/mtk](https://mogdb.io/mtk) + +## Oracle Data Collection + +Currently Oracle's data collection is a continuous SQL collection from the cache. By default, the collection process is performed periodically (every 10 minutes) for a total of one week of cached SQL data. + +### Required Permissions + +The permissions required for the collection user are as follows. + +> SELECT ANY DICTIONARY +> +> CREATE PROCEDURE +> +> EXECUTE ON DBMS_LOB +> +> SELECT ON GV_$SQL_PLAN + +### Related Commands + +```shell +./sca_linux_x86_64 -T OC -s SCOTT -h -p -n -u -e + +# Command options: +# -s specify the list of Schema to be collected +# -h/p/t/u/e specify the connection method of the source Oracle database +# Considering that the SQL content in the shared cache may not be comprehensive, the collection will be continuous and incremental, with one week of data collected by default. If you are only doing functional validation, you can use the -q 0.001 -Q 60 option, i.e. only collect SQL data once. +``` + +### Collection Results + +When the collection is complete, a zip packet is generated which can be copied to the target database and unpacked into a data directory. + +Subsequent compatibility analysis relies on the data in this packet and does not require another connection to the source Oracle database. + +## Repository Initialization + +```shell +# Use the MogDB/openGauss user with administrator privileges for repository initialization +# Repository initialization will create a database with the name sca_db by default +# You can specify the repository name by -N, the repository user name by -U, and the repository user password by -E + +./sca_linux_x86_64 -T i -H -P -N sca_db -U sca_repo -E 'SCA@password' --user --password +``` + +## Perform Analysis Tasks + +### Compatibility Analysis + +To do SQL compatibility analysis only, use the following command. + +```shell +# If the repository name, user name, password are not the default, you need to use the -N, -U, -E options to specify +# If the target database information is different from the repository, use the -h, -p, -n, -u, -e options to specify + +./sca_linux_x86_64 -T OI -H -P -n -d +``` + +When the analysis is complete, a report will be generated in the directory specified by **-d**, which can be downloaded offline for viewing. + +### Performance Comparison Analysis + +To do SQL performance comparison analysis only, use the following command. + +```shell +# If the repository name, user name, password are not the default, you need to use the -N, -U, -E options to specify +# If the target database information is different from the repository, use the -h, -p, -n, -u, -e options to specify + +./sca_linux_x86_64 -T OS -H -P -n -d +``` + +When the analysis is complete, a report will be generated in the directory specified by **-d**, which can be downloaded offline for viewing. + +### Compatibility Analysis + Performance Comparison Analysis + +If you need to do both SQL compatibility analysis and SQL performance comparison, use the following command. + +```shell +# If the repository name, user name, password are not the default, you need to use the -N, -U, -E options to specify +# If the target database information is different from the repository, use the -h, -p, -n, -u, -e options to specify + +./sca_linux_x86_64 -T OIS -H -P -n -d +``` + +When the analysis is complete, a report will be generated in the directory specified by **-d**, which can be downloaded offline for viewing.‘ diff --git a/product/en/docs-sca/v5.1/overview.md b/product/en/docs-sca/v5.1/overview.md new file mode 100644 index 00000000..568ea62e --- /dev/null +++ b/product/en/docs-sca/v5.1/overview.md @@ -0,0 +1,58 @@ +--- +title: Overview of SCA +summary: Overview of SCA +author: hongyedba +date: 2021-09-30 +--- + +# Overview of SCA + +SCA (SQL Compatibility Analyzer) is an SQL compatibility and performance evaluation tool for heterogeneous databases prior to migration. + +SCA is used to assess the compatibility of heterogeneous data before migration, to assess whether the actual business SQL in the source database has syntax problems in the target database, and to assess the performance differences between the actual execution of SQL in the two heterogeneous databases. + +SCA currently supports three source databases, including: Oracle, MySQL, DB2. + +**Applicable to**: MogDB and other openGauss-based databases + +SCA service program can run the following 10 types of tasks: + +1. `I [Initialize]`: **Initialize**, used to initialize the SCA repository +2. `L [Apply License]`: **Apply license**, task for license application +3. `OC [Oracle Collection]`: **Oracle collection**, used to collect the business SQL executed in Oracle database, need long time to collect +4. `MC [MySQL Collection]`: **MySQL collection**, used to collect the business SQL executed in MySQL database, need to configure the slow log in advance, and then collect at once +5. `DC [DB2 Collection]`: **DB2 collection**, used to collect the business SQL executed in DB2 database, need long time to collect +6. `OI [Oracle Inspection]`: **Oracle compatibility evaluation**, used to evaluate the actual compatibility of business SQL collected in Oracle on the source side in MogDB on the target side +7. `MI [MySQL Inspection]`: **MySQL compatibility evaluation**, used to evaluate the actual compatibility of business SQL collected in MySQL on the source side in MogDB on the target side +8. `DI [DB2 Inspection]`: **DB2 compatibility evaluation**, used to evaluate the actual compatibility of business SQL collected in DB2 on the source side in MogDB on the target side +9. `OS [Oracle Simulation]`: **Oracle performance evaluation**, used to evaluate the execution performance of business SQL collected in Oracle on the source side in MogDB on the target side +10. `OIS [Oracle Inspection & Simulation]`: **Oracle compatibility and performance evaluation**, equivalent to OI + OS two tasks at the same time + +Note: + +- Compatibility assessment requires a complete source database structure. It is recommended that MTK is used for migration of the source database structure. SCA can also be used to automatically create a target test database for test purposes. +- SQL simulation requires a complete source database data and data. It is recommended that MTK is used for migration of the source database structure and data. + +## Supported OSs and Platforms + +SCA supports the following OSs and platforms currently: + +1. Linux x86_64 +2. Linux arm64 (ARM platform does not support DB2 data collection because it does not have a DB2 client) +3. MacOS (MacOS does not support the SHA256 encryption authentication mode of openGauss/MogDB.) + +## Supported Databases + +SCA supports the following source and target databases currently: + +1. Source database: + - Oracle >= 10.2 + - MySQL >= 5.5 + - DB2 >= 11.5 + - PostgreSQL >= 9 + - File: MySQL slow-log +2. Target database: + - MogDB/openGauss >= 2.0 + - PostgreSQL >= 13.0 + +**Note**: Whether SCA supports a platform does not have relevance to whether SCA supports a database. A database running on other platforms, such as Windows/AIX can be supported by SCA only when the the host network and port are connected between the database and SCA. diff --git a/product/en/docs-sca/v5.1/pg_to_mogdb.md b/product/en/docs-sca/v5.1/pg_to_mogdb.md new file mode 100644 index 00000000..f1127f8e --- /dev/null +++ b/product/en/docs-sca/v5.1/pg_to_mogdb.md @@ -0,0 +1,74 @@ +--- +title: PostgreSQL to MogDB Evaluation +summary: PostgreSQL to MogDB Evaluation +author: Hong Rihua +date: 2022-04-19 +--- + +# PostgreSQL to MogDB Evaluation + +## MogDB Environment Preparation + +You need to prepare your own MogDB/openGauss database environment for the repository and the target database, and the same environment is used by default for the repository and the target database. + +## Object Structure Migration + +- **Structure Migration**: You need to create the target database in the prepared MogDB/openGauss environment and migrate the data structures from the source database to the newly created target database. + +- **Data Migration**: If you want to perform SQL performance comparison evaluation, then in addition to migrating the database structure, you also need to migrate the table data to ensure that the source PostgreSQL and the target MogDB/openGauss database data size is the same for the performance comparison to be meaningful. + +Related Tool: + +- **MTK**: Structure and data migration can be done by MTK, for details please refer to [https://mogdb.io/mtk](https://mogdb.io/mtk) + +## PostgreSQL Data Collection + +Currently, the PostgreSQL source SQL collection relies on the plug-in pg_stat_statements plugin, which collects all SQL recorded in the plug-in table at once. + +To avoid SQL misses, it is recommended to adjust the maximum number of SQL retained in the plugin and ensure that the entire business cycle is tested and executed or covered. + +### Required Permissions + +The permissions required for the collection user are as follows. + +> superuser (for querying the data dictionary and pg_stat_statements plug-in table) + +### Related Commands + +```shell +./sca_linux_x86_64 -T PC -h -p -n -u -e + +# Command options: +# -h/p/t/u/e specify the connection method of the source PostgreSQL database +``` + +### 采集结果 + +When the collection is complete, a zip packet is generated which can be copied to the target database and unpacked into a data directory. + +Subsequent compatibility analysis relies on the data in this packet and does not require another connection to the source PostgreSQL database. + +## Repository Initialization + +```shell +# Use the MogDB/openGauss user with administrator privileges for repository initialization +# Repository initialization will create a database with the name sca_db by default +# You can specify the repository name by -N, the repository user name by -U, and the repository user password by -E + +./sca_linux_x86_64 -T i -H -P -N sca_db -U sca_repo -E 'SCA@password' --user --password +``` + +## Perform Analysis Tasks + +### Compatibility Analysis + +PostgreSQL database currently only supports SQL compatibility analysis with the following command. + +```shell +# If the repository name, username, password are not the default, you need to use the -N, -U, -E options to specify +# If the target database information is different from the repository, use the -h, -p, -n, -u, -e options to specify + +./sca_linux_x86_64 -T PI -H -P -n -d +``` + +When the analysis is complete, a report will be generated in the directory specified by **-d**, which can be downloaded offline for viewing. diff --git a/product/en/docs-sca/v5.1/release-notes.md b/product/en/docs-sca/v5.1/release-notes.md new file mode 100644 index 00000000..942ac548 --- /dev/null +++ b/product/en/docs-sca/v5.1/release-notes.md @@ -0,0 +1,87 @@ +--- +title: Release Notes +summary: Release Notes +author: hongyedba +date: 2021-09-30 +--- + +# Release Notes + +## v5.1.0 + +- [sca_linux_arm64](https://cdn-mogdb.enmotech.com/sca/SCA_v5.1.0/sca_linux_arm64) +- [sca_linux_x86_64](https://cdn-mogdb.enmotech.com/sca/SCA_v5.1.0/sca_linux_x86_64) +- [sca_macos_x86_64](https://cdn-mogdb.enmotech.com/sca/SCA_v5.1.0/sca_macos_x86_64) +- [Sample Report](https://cdn-mogdb.enmotech.com/sca/SCA_v5.0.0/SCA_sample_report_v5.0.zip) + +### Features + +- Add source database support: PostgreSQL, whose SQL data is collected from the pg_stat_statements plugin +- Add object information collection: All source databases will automatically collect field information and PLSQL parameter information, and automatically create them for use in SQL compatibility audits. + +### Bugfixs + +- Fix the error reporting problem that the progress bar output will be abnormal due to the queue problem in some cases +- Fix the problem that performance_schema is not recognized as SYSTEM_CATALOG during MySQL collection +- Fix the problem that the system built-in objects are not filtered when MySQL object information is collected +- Fix the error caused by the -D option after replacing the openGauss driver due to a type binding problem +- Fix the problem of failing to verify the expiration time of license when verifying license. +- Fix the problem that the SQL text contains the line starting with **#** in the slow log file, which causes the error in parsing. +- Fix the problem of abnormal sorting of table data when the source database is Oracle in the SQL compatibility summary page. +- Fix the problem that offline report cannot be displayed in some browsers due to JS problem. +- Fix the abnormal exit caused by the failure to parse ID data when MySQL slow log collection. +- Fix the problem that the **--debug** option is not recognized and requires specified value. +- Fix the problem of Oracle data collection character set error reported in some scenarios ORA-29275 (character set conversion + splicing empty string) +- Fix the error reported in Oracle's report for source database due to the GaussDB database does not have median function, resulting in complex data query. + +### Improvements + +- Set **search_path** to include all schema after connecting to the target database to reduce the probability of not finding tables in SQL compatibility analysis process. +- Add SQL rewriting rules, add some SQL rewriting rules for Oracle database. +- Adjust the detailed SQL list in csv format, use Chinese table header and part of Chinese content, add support category field, easy to read and understand. +- Adjust the logic of SQL compatibility summary page data, NULL related rewriting is not considered as rewriting support. +- Adjust the MySQL data acquisition logic, strictly follow the FILE/TABLE setting in log_output to get data. +- Driver and supporting code adjustment, use psycopg2 driver for PostgreSQL database, use py_opengauss driver for openGauss database. +- Remove some redundant format_size functions and their calls + +## v5.0.0 + +- [sca_linux_arm64](https://cdn-mogdb.enmotech.com/sca/SCA_v5.0.0/sca_linux_arm64) +- [sca_linux_x86_64](https://cdn-mogdb.enmotech.com/sca/SCA_v5.0.0/sca_linux_x86_64) +- [sca_macos_x86_64](https://cdn-mogdb.enmotech.com/sca/SCA_v5.0.0/sca_macos_x86_64) +- [sample_report](https://cdn-mogdb.enmotech.com/sca/SCA_v5.0.0/SCA_sample_report_v5.0.zip) + +### Feature + +- Code logic refactoring, command options and usage have changed considerably +- Support MySQL connecting database collection, through the database table of slow log information, or through the server-side local slow log +- Support DB2 data collection, collection mode is similar to Oracle, need to run for a long time, the default collection time is one week +- Supports DB2 SQL compatibility analysis, provided that the target MogDB database needs to be prepared in advance +- Support for export of Oracle SQL detail list files (sql_detail_list.csv) +- Adjust Oracle data collection logic, remove unnecessary object information and system configuration information +- Adjust the repository structure, only keep the basic information of the source database, SQL compatibility audit, SQL performance comparison and other related structures + +## v4.1.0 + +- [SCA Usage_v4.1.0.pdf](https://cdn-mogdb.enmotech.com/sca/SCA_v4.1.0/SCA使用说明_v4.1.0.pdf) +- [sca_linux_arm64](https://cdn-mogdb.enmotech.com/sca/SCA_v4.1.0/sca_linux_arm64) +- [sca_linux_x86_64](https://cdn-mogdb.enmotech.com/sca/SCA_v4.1.0/sca_linux_x86_64) +- [sca_macos_x86_64](https://cdn-mogdb.enmotech.com/sca/SCA_v4.1.0/sca_macos_x86_64) + +### Feature + +- Support SQL compatibility analysis of the MySQL database based on the slow log and general log. +- Support customization of SQL conversion rules. + +## v4.0.0 + +- [SCA Usage_v4.0.0.pdf](https://cdn-mogdb.enmotech.com/sca/SCA_v4.0.0/SCA使用说明_v4.0.0.pdf) +- [sca_linux_arm64](https://cdn-mogdb.enmotech.com/sca/SCA_v4.0.0/sca_linux_arm64) +- [sca_linux_x86_64](https://cdn-mogdb.enmotech.com/sca/SCA_v4.0.0/sca_linux_x86_64) +- [sca_macos_x86_64](https://cdn-mogdb.enmotech.com/sca/SCA_v4.0.0/sca_macos_x86_64) + +### Feature + +- Integrate a newly designed offline analysis report. +- Support data collection, compatibility analysis, and performance assessment of a single executable program. +- Support the SHA256 encryption authentication mode of openGauss/MogDB in the Linux OS. diff --git a/product/en/docs-sca/v5.1/result.md b/product/en/docs-sca/v5.1/result.md new file mode 100644 index 00000000..bb21919d --- /dev/null +++ b/product/en/docs-sca/v5.1/result.md @@ -0,0 +1,182 @@ +--- +title: SCA Result +summary: SCA Result +author: hongyedba +date: 2021-09-30 +--- + +# SCA Result + +The SCA result includes: + +- **Collection result**: The data collection result in the source database (Oracle) will be automatically packaged into a ZIP file, and a clear file position prompt will be provided at the end of the collection. +- **Analysis result**: An analysis report is generated after the analysis is complete in the target MogDB/openGauss database. The report is stored in an independent folder, in the offline HTML format, and can be copied for reading at will. + +## Collection Result + +The data collection result is automatically packaged into a ZIP file and stored in the current directory of the program by default. + +The collection result prompts the following information: + +``` +2022-02-15 19:20:40.301126 INFO [runMe.py:356] +==================== [ Summary Information ] ====================+ +2022-02-15 19:20:40.301184 INFO [runMe.py:357] | Task Name File Name File Size | +2022-02-15 19:20:40.301222 INFO [runMe.py:358] | --------------------- ------------------------------ ---------- | +2022-02-15 19:20:40.301260 INFO [runMe.py:360] | SCA_SESSION_SQL sca_sql_information.dat 3.65 KB | +2022-02-15 19:20:40.301294 INFO [runMe.py:360] | SCA_SESSION_SQL_PERF sca_sql_performance.dat 3.29 KB | +2022-02-15 19:20:40.301326 INFO [runMe.py:360] | SCA_MYSQL_USER_HOST sca_mysql_user_host.dat 1815 B | +2022-02-15 19:20:40.301357 INFO [runMe.py:360] | SCA_DATABASE sca_database.dat 163 B | +2022-02-15 19:20:40.301387 INFO [runMe.py:361] +=================================================================+ + + >>> Final Result is: + >>> ---------------------------------------------- + >>> /Users/hongyedba/Desktop/SCA_MySQL_test.zip +``` + +## Analysis Report + +Both compatibility analysis and SQL performance simulation will generate an analysis report, which are stored in specified data directories by default. You can also use `-r` to specify the output directory of the report. + +### Report Entry + +In the analysis report directory, you can click `index.html` to view the analysis report through the default browser Google Chrome. + +![img](https://cdn-mogdb.enmotech.com/docs-media/sca/result-1.png) + +![img](https://cdn-mogdb.enmotech.com/docs-media/sca/result-2.png) + +### SQL Compatibility Summary + +The SQL compatibility summary page shows the compatibility analysis result. The table lists all SQLs collected from the system and whether they are supported in MogDB by user name, program name, and module name. + +Note: The content of this table varies slightly from database to database. + +![img](https://cdn-mogdb.enmotech.com/docs-media/sca/result-3.png) + +### SQL Rewrite Rule + +The SQL rewrite rule page shows the SQL rewrite rules involved in the analysis. + +The usage field shows the trigger situation of a rule. + +- Match indicates the number of rule hits in a SQL. +- Count indicates the number of SQLs that match a rule. + +![img](https://cdn-mogdb.enmotech.com/docs-media/sca/result-4.png) + +### SQL Complexity Distribution + +The SQL complexity distribution page shows the SQL complexity distribution. +The current judgment standard of complexity distribution is as follows: + +1. The more the number of tables that a SQL involves, the higher the complexity. +2. The more the number of times for using the connect by syntax in a SQL, the higher the complexity. In this situation, the execution performance problem occurs probably. +3. The more the number of user-defined functions used in a SQL, the higher the complexity. Because the logic complexity in a user-defined function is not clear, the SQL complexity is high if many user-defined functions are used in a SQL. +4. The longer the time taken for executing a function in Oracle, the higher the SQL complexity. + +The complexity of each SQL is determined according to the above four standards. If the SQL complexity is higher, the SQL execution performance needs to be paid more attention to after migration. Only in this way can service faults be avoided due to performance problems. + +![img](https://cdn-mogdb.enmotech.com/docs-media/sca/result-5.png) + +### SQL Performance Comparison (Performance Comparison Summary) + +The SQL performance comparison summary page shows the following information: + +1. Basic performance comparison information, basic configuration related to performance comparison, and related thresholds used in comparison +2. SQL performance is summarized by such dimensions as general, up, down, not supported, and timeout to analyze the impact of all kinds of SQLs on the overall workload. + +![img](https://cdn-mogdb.enmotech.com/docs-media/sca/result-6.png) + +### SQL Performance Comparison (Top by Workload/SQL, Timeout) + +In SQL performance comparison, the content formats of the Top by Workload, Top by SQL, and Timeout pages are similar. The following uses the Top by Workload page as an example. +The table lists 100 SQLs that affect the performance most. The SQL FMS field uses the hyperlink format, which can be clicked to view the SQL analysis details. +The SQL performance impact can be assessed from two dimensions: + +1. SQL impact: the proportion of SQLs that has performance impact when single SQL is executed +2. Load impact: the impact of the current SQL on the performance change of the overall SQL workload and the overall SQL performance in terms of the total number of its execution times. + +![img](https://cdn-mogdb.enmotech.com/docs-media/sca/result-7.png) + +### SQL Performance Comparison (SQL Details) + +The SQL details page shows the following information: + +1. SQL execution information: SQL execution information in Oracle and MogDB + + Oracle execution information is from the dynamic performance view, and MogDB execution information is from actual SQL execution. + +2. SQL text: SQL execution text in Oracle and actual execution text in MogDB + +3. SQL binding variable: SQL binding variable in Oracle + + The binding variable will be applied to the SQL execution text in MogDB to simulate service execution in MogDB. + +4. Oracle execution plan: SQL execution plan in Oracle, which is from the dynamic performance view + +5. MogDB execution plan: SQL execution plan in MogDB, which is from actual execution. + + The program automatically analyzes the MogDB execution plan and marks out the potential performance problems. + +6. MogDB object information: structures and statistics of SQL-involved objects in MogDB + +![img](https://cdn-mogdb.enmotech.com/docs-media/sca/result-8.png) + +![img](https://cdn-mogdb.enmotech.com/docs-media/sca/result-9.png) + +![img](https://cdn-mogdb.enmotech.com/docs-media/sca/result-10.png) + +### Supported SQL List + +In the root directory where the report is located, the `sql_detail_list.csv` file records all supported SQLs involved in the SQL compatibility evaluation, automatic rewriting policies of partial SQLs during the evaluation, and rewritten SQL script. + +The content of the fields in the file varies slightly, mainly depending on the database. + +#### Oracle + +1. **user name**: specifies the schema of SQLs to be executed. In Oracle, this field refers to the user. +2. **sql type**: specifies the SQL type. You need to make it clear whether the SQLs collected are system SQLs or service SQLs. The values are: USER, SYSTEM_CATALOG, SYSTEM_COMMAND. +3. **module**: specifies the client module for executing SQL statements. +4. **action**: specifies the client action for executing SQL statements. +5. **mogdb error code**: specifies the SQL execution error code in MogDB. +6. **mogdb error message**: specifies the SQL execution error message in MogDB. +7. **support category**: specifies SQL support category in MogDB: direct support, rewrite support, no support +8. **original sql**: specifies the original SQL script. +9. **transform rules**: specifies the automatic rewriting rules of SQLs. +10. **sql rewrite**: specifies the rewritten SQL script. + +#### MySQL + +1. **database name (schema)**: specifies the schema of SQLs to be executed. In MySQL, this field refers to the database. +2. **sql type**: specifies the SQL type. You need to make it clear whether the SQLs collected are system SQLs or service SQLs. The values are: USER, SYSTEM_CATALOG, SYSTEM_COMMAND. +3. **user host**: specifies the host information of the MySQL client user who executes SQLs and the corresponding execution client. +4. **mysql error code**: specifies the SQL execution situation in MySQL. If the execution result is 0, the SQL execution is successful. Otherwise, the execution error is reported. +5. **mogdb error code**: specifies the SQL execution error code in MogDB. +6. **mogdb error message**: specifies the SQL execution error message in MogDB. +7. **support category**: specifies SQL support category in MogDB: direct support, rewrite support, no support +8. **original sql**: specifies the original SQL script. +9. **transform rules**: specifies the automatic rewriting rules of SQLs. +10. **sql rewrite**: specifies the rewritten SQL script. + +#### DB2 + +1. **schema**: specifies the schema of SQLs to be executed. +2. **sql type**: specifies the SQL type. You need to make it clear whether the SQLs collected are system SQLs or service SQLs. The values are: USER, SYSTEM_CATALOG, SYSTEM_COMMAND. +3. **statement type**: specifies the types of SQL statements recorded in DB2. +4. **mogdb error code**: specifies the SQL execution error code in MogDB. +5. **mogdb error message**: specifies the SQL execution error message in MogDB. +6. **support category**: specifies SQL support category in MogDB: direct support, rewrite support, no support +7. **original sql**: specifies the original SQL script. +8. **transform rules**: specifies the automatic rewriting rules of SQLs. +9. **sql rewrite**: specifies the rewritten SQL script. + +#### PostgreSQL + +1. **schema**: specifies the schema of SQLs to be executed. +2. **sql type**: specifies the SQL type. You need to make it clear whether the SQLs collected are system SQLs or service SQLs. The values are: USER, SYSTEM_CATALOG, SYSTEM_COMMAND. +3. **mogdb error code**: specifies the SQL execution error code in MogDB. +4. **mogdb error message**: specifies the SQL execution error message in MogDB. +5. **support category**: specifies SQL support category in MogDB: direct support, rewrite support, no support +6. **original sql**: specifies the original SQL script. +7. **transform rules**: specifies the automatic rewriting rules of SQLs. +8. **sql rewrite**: specifies the rewritten SQL script. diff --git a/product/en/docs-sca/v5.1/toc.md b/product/en/docs-sca/v5.1/toc.md new file mode 100644 index 00000000..9c1e9640 --- /dev/null +++ b/product/en/docs-sca/v5.1/toc.md @@ -0,0 +1,16 @@ + + +# Documentation + +## SCA Documentation + ++ [Overview](/overview.md) ++ [Usage](/usage.md) ++ [Command Options](/command_options.md) ++ [Result](/result.md) ++ [Release Notes](/release-notes.md) ++ Usage Examples + + [Oracle to MogDB Evaluation](/oracle_to_mogdb.md) + + [MySQL to MogDB Evaluation](/mysql_to_mogdb.md) + + [DB2 to MogDB Evaluation](/db2_to_mogdb.md) + + [PostgreSQL to MogDB Evaluation](/pg_to_mogdb.md) \ No newline at end of file diff --git a/product/en/docs-sca/v5.1/usage.md b/product/en/docs-sca/v5.1/usage.md new file mode 100644 index 00000000..917185ab --- /dev/null +++ b/product/en/docs-sca/v5.1/usage.md @@ -0,0 +1,70 @@ +--- +title: SCA Usage +summary: SCA Usage +author: hongyedba +date: 2021-09-30 +--- + +# SCA Usage + +## Deployment Procedure + +SCA is already packaged as a binary executable, no additional deployment operations are required. + +SCA can be directly used only when the database can be connected through the network after the program file is uploaded. + +## Common Commands + +**Note**: The following uses the Linux OS of the x86-64 architecture as an example. For other ARM platforms or the MacOS system, the commands need to be modified accordingly. + +* Apply for a License (when the software is run for the first time, you need to apply for a license online first) + +```shell +# You need to enter the user email during the run +# The requested license data will be sent to the entered email address. +# Copy the license data and write it to the license.json file in the same directory as the SCA +./sca_linux_x86_64 -T L +``` + +* Initialize the repository (for the same target MogDB database, you only need to initialize the repository on the first run) + +```shell +./sca_macos_x86_64 -T i -H -P -N -U -E --user --password +``` + +* Oracle data collection (no repository required) + +```shell +# For test, you can add the following parameters to speed up SQL collection: -q 0.001 -Q 60 -m off +./sca_linux_x86_64 -T OC -s SCOTT -h -p -n -u -e +``` + +* Slow log collection from specified MySQL + +```shell +./sca_linux_x86_64 -T MC -d --slow-log= +``` + +* Automatic collection from a specified MySQL server + +```shell +./sca_linux_x86_64 -T MC -d -h -p -n -u -e +``` + +* Perform Oracle compatibility evaluation (repository is initialized and repository login information is default) + +```shell +./sca_linux_x86_64 -T OI -d -n +``` + +* SQL simulation only (repository is initialized and repository user and password information is default) + +```shell +./sca_linux_x86_64 -T OS -h -p -n -d +``` + +* Simultaneous SQL compatibility and performance evaluation (repository initialized) + +```shell +./sca_linux_x86_64 -T OIS -h -p -n -u -e -d +``` diff --git a/product/zh/docs-sca/v5.1/command_options.md b/product/zh/docs-sca/v5.1/command_options.md new file mode 100644 index 00000000..7b8246e4 --- /dev/null +++ b/product/zh/docs-sca/v5.1/command_options.md @@ -0,0 +1,319 @@ +--- +title: SCA 使用说明 +summary: SCA 使用说明 +author: Hong Rihua +date: 2022-03-07 +--- + +# SCA 命令行选项 + +SCA 为纯命令行工具,所有功能均可通过单一的可执行程序完成。 + +## 通用选项 + +通用选项在各类任务中均有可能会使用。 + +### --help (帮助信息) + +在使用分析程序前,建议仔细阅读帮助信息(命令: `./sca_linux_x86_64 --help`): + +```shell +Introduction: + SCA is a tool used to do SQL life-cycle inspection and simulation + when migration from heterogeneous database to MogDB. + Source databases supported as below: + 1. Oracle : SQL Inspection, Simulation + 2. DB2 : SQL Inspection + 3. MySQL : SQL Inspection + 4. PostgreSQL : SQL Inspection + +Options: + --[ Overall ]-- + --help : Show help message + -v, --version : Show SCA version + -T, --type : Run type: + : I = Init SCA repository + : L = Apply for license + : ----------------[Analysis: Target required]--------------- + : OI = Oracle Inspection, this is default type + : MI = MySQL Inspection + : DI = DB2 Inspection + : PI = PostgreSQL Inspection + : OS = Oracle Simulation, just like Oracle SPA + : OIS = Oracle Inspection & Simulation + : ------------[Collection: No need of target DB]------------ + : OC = Oracle Collection + : MC = MySQL Collection + : DC = DB2 Collection + : PC = PostgreSQL Collection + -d, --data : Unzipped data directory for analyzer, or directory for collection + -D, --data-id : Use data with data id existed in the repository + -w, --workers : Parallel workers for tasks, default: 10 + -x, --debug : Enable debug mode + -l, --logfile : Output to both logfile (without progress-bar) and screen (with progress-bar) + -L, --log-only : Output to only logfile (without progress-bar) + -F, --force : Force mode in REPO Creation, drop old objects before create it + -r, --report : Final report file location, default in data directory with name 'report' + --license : License file, default is [./license.json] + --sql-transformer : Regular rules for SQL transformation (for internal use) + : Format: [{"name": "xxx" + : "source": "xxx", + : "target": "xxx", + : "comment": "xxx"}, ...] + + --[ Repository Connection ]-- + -H, --repo-host : Repository DB Server host address, default: 127.0.0.1 + -P, --repo-port : Repository DB server port, default: 5432 + -N, --repo-name : Repository database, default: sca_db + -U, --repo-user : Repository user, default: sca_repo + -E, --repo-password : Repository password, default: SCA@password + --user : Administrator used to create repository DB and user, default: mogdb + --password : Password for Administrator, default: mogdb + + --[ Source & Target Connection ]-- + -h, --db-host : Source & Target DB Server host address, default same as -H + -p, --db-port : Source & Target DB server port, default same as -P + -n, --db-name : Source & Target database, default same as -N + -u, --db-user : Source & Target user, default same as -U + -e, --db-password : Source & Target password, default same as -E + --target-type : Target database type in analysis tasks, default: MOGDB + : Valid type: ORACLE, MOGDB, OPENGAUSS, POSTGRESQL, MYSQL, DB2 + + --[ Collection Options ]-- + -q, --sql-days : How many days for session sql data, default: 7 + -Q, --sql-interval : SQL collect interval in seconds, default: 600 + -s, --schema-include : Users/Schemas included in data collection, default: '' + -S, --schema-exclude : Users/Schemas excluded in data collection + : Default: <> + -m, --enable-monitor : Starting background monitor process in SQL Collection + : Valid values: 1/on/true/t = ENABLE, default: on + : 0/off/false/f = DISABLE + --slow-log : MySQL slow-log for client data collection + --sql-csv : SQL file in csv format for SQL Inspection (@todo) + +Usage: + 0. Apply for license + ./sca_macos_x86_64 -T L + 1. Init repository (used for first running) + ./sca_macos_x86_64 -T i -H -P -N -U -E --user --password + 2. Oracle data collection + # Notice: "-q 0.001 -Q 60" means gather Session SQL only once + # "-m off" means do not monitor system status (CPU Idle and Disk Free) + ./sca_macos_x86_64 -T OC -s SCOTT -h -p -n '' -u -e -q 0.001 -Q 60 -m off + ./sca_macos_x86_64 -T OC -s SCOTT -h -p -n '' -u -e + 3. MySQL data collection using slow-log file + ./sca_macos_x86_64 -T MC -d --slow-log= + 4. Oracle SQL compatible analysis (Required: Repository, Target DB) + Note: use [H/P/N/U/E] options to assign the repository + use [h/p/n/u/e] options to assign the target database + ./sca_macos_x86_64 -T OI -d -n + 5. Oracle SQL performance simulation (Required: Repository, Target DB) + Note: use [H/P/N/U/E] options to assign the repository + use [h/p/n/u/e] options to assign the target database + ./sca_macos_x86_64 -T OS -d -n + 6. MySQL SQL compatible analysis (Required: Repository, Target DB) + Not e: use [H/P/N/U/E] options to assign the repository + use [h/p/n/u/e] options to assign the target database + ./sca_macos_x86_64 -T MI -d -h -p -n +``` + +### -v, --version (查看版本) + +查看当前 SCA 的版本信息: + +```shell +hongyedba@localhost ~ % ./sca_linux_x86_64 -v +SCA version: 5.1.0 +``` + +### -T, --type (任务类型) + +默认值: OI + +指定任务类型,目前 SCA 支持如下类型的任务: + +1. `I [Initialize]`: **初始化**,用于初始化 SCA 资料库 +2. `L [Apply License]`: **申请 License**,用于 License 申请的任务 +3. `OC [Oracle Collection]`: **Oracle采集**,用于采集 Oracle 数据库中执行过的业务 SQL,需要长时间采集 +4. `MC [MySQL Collection]`: **MySQL采集**,用于采集 MySQL 数据库中执行过的业务 SQL,需提前配置慢日志,然后一次性采集 +5. `DC [DB2 Collection]`: **DB2采集**,用于采集 DB2 数据库中执行过的业务 SQL,需要长时间采集 +6. `OI [Oracle Inspection]`: **Oracle兼容评估**,用于评估源端 Oracle 中采集到的业务 SQL,在目标端 MogDB 中的实际兼容性 +7. `MI [MySQL Inspection]`: **MySQL兼容评估**,用于评估源端 MySQL 中采集到的业务 SQL,在目标端 MogDB 中的实际兼容性 +8. `DI [DB2 Inspection]`: **DB2兼容评估**,用于评估源端 DB2 中采集到的业务 SQL,在目标端 MogDB 中的实际兼容性 +9. `OS [Oracle Simulation]`: **Oracle性能评估**,用于评估源端 Oracle 中采集到的业务 SQL,在目标端 MogDB 中的执行性能 +10. `OIS [Oracle Inspection & Simulation]`: **Oracle兼容和性能评估**,等同于 OI + OS 两种任务同时进行 + +### -d, --data (数据目录) + +除了资料库初始化(-T I)之外的所有任务类型,都可以指定 -d 数据目录。 + +采集任务会将采集到的数据写入到 -d 指定的数据目录。 + +分析任务会从数据目录中读取数据,将数据插入到资料库表中,最终生成的报告结果默认也会写入到数据目录下。 + +### -D, --data-id (数据编号) + +指定数据编号,然后直接从资料库读取指定编号的数据,而不是重新从数据目录中读取并加载数据。 + +指定 -D 选项后,会跳过加载数据的步骤,直接执行相关的分析任务。 + +### -w, --workers (并发度) + +默认值: 10 + +指定用于运行任务的并发度大小,适量的并发度有助于提高各个任务的运行速度。 + +并发度用于: 文件数据加载到资料库,SQL兼容度评估,SQL复杂度评估,SQL性能模拟等操作中。 + +**注意**: + +* 在SQL模拟任务中,并发度越大,可能导致单条SQL的执行效率下降,需要依据实际生产环境的负载压力选取合理的并发度,通常设置为生产库的平均活跃会话数。 + +### -x, --debug (Debug模式) + +开启 Debug 模式,不会影响正常的分析逻辑,但是会输出大量的日志,通常用于辅助分析程序本身的运行异常。 + +### -l, --logfile (日志文件) + +指定程序日志文件,程序输出会同时输出到终端命令行与日志文件中。 + +### -L, --log-only (仅日志文件) + +指定程序日志文件,程序输出会仅输出到日志文件中。 + +### -F, --force (强制模式) + +是否启用强制模式,强制模式会在资料库初始化场景下生效。 + +在资料库初始化过程中,如果启用强制模式,会先删除资料库,然后重新创建。 + +### -r, --report (报告目录) + +默认值: /report + +指定报告目录,这里的报告包括 `兼容评估` 报告和 `SQL模拟` 报告,都是 html 格式报告,可离线查看。 + +同时,对于 `兼容评估` 任务,还会产生 `sql_detail_list.csv` 文件,记录所有 SQL 的评估结果,以及可能存在的改写方案。 + +### --license (License文件) + +默认值: ./license.json + +License 文件的位置。 + +### --sql-transformer (SQL转换规则) + +【高级用法】 指定从源库到目标库的 SQL 转换规则,使用正则匹配进行转换。 + +## 资料库选项 + +### -H, --repo-host (资料库IP地址) + +默认值: 127.0.0.1 + +资料库数据库 IP 地址。 + +### -P, --repo-port (资料库端口) + +默认值: 5432 + +资料库数据库端口。 + +### -N, --repo-name (资料库名称) + +默认值: sca_db + +资料库数据库名称。 + +### -U, --repo-user (资料库用户) + +默认值: sca_repo + +资料库数据库登录用户。 + +### -E, --repo-password (资料库密码) + +默认值: SCA@password + +资料库数据库登录密码。 + +### --user (管理员用户) + +资料库管理员用户,用于初始化资料库过程中,创建资料库用户,和资料库数据库。 + +### --password (管路员密码) + +资料库管理员用户登录密码,用于初始化资料库过程中,创建资料库用户,和资料库数据库。 + +## 源或目标库连接 + +### -h, --db-host (源或目标库IP地址) + +源或目标库数据库 IP 地址,默认继承 -H 选项值。 + +### -p, --db-port (源或目标库端口) + +源或目标库数据库端口,默认继承 -P 选项值。 + +### -n, --db-name (源或目标库名称) + +源或目标库数据库名称,默认继承 -N 选项值。 + +在 SQL 分析任务中,指定目标库名称,通常是使用 MTK 或其他数据迁移工具,进行数据库结构和数据迁移时,在 MogDB 端创建的目标数据库。 + +需要注意的是,资料库用户默认需要对目标库有完全的操作权限,默认资料库是管理员权限。 + +* 在兼容评估中,只需要目标库有对象结构即可 +* 在SQL模拟中,需要目标库既有对象结构,也有真实对等的全量生产数据,否则性能模拟的结果不具有参考意义 + +### -u, --db-user (源或目标库用户) + +源或目标库数据库登录用户,默认继承 -U 选项值。 + +### -e, --db-password (源或目标库密码) + +源或目标库数据库登录密码,默认继承 -E 选项值。 + +### --target-type (目标库类型) + +指定分析任务总,目标数据库类型,默认为 MOGDB。 + +当前此参数并未完善,仅支持 MOGDB/POSTGRESQL。 + +## 数据采集选项 + +### -q, --sql-days (SQL采集天数) + +指定 SQL 相关数据的采集总天数,由于从会话缓存 GV$SQLAREA 中采集已执行过的 SQL 数据,存在一定的概率漏采,可通过延长采集天数,减小漏采的概率。 + +默认会采集一周 7 天的 SQL 数据。 + +### -Q, --sql-interval (SQL采集间隔) + +指定 SQL 相关数据的采集间隔(单位:秒),默认每 10 分钟采集一次。 + +每次采集到的 SQL 数据会和已采集的数据进行对比去重,避免重复数据太多导致的数据文件过大。 + +### -s, schema-include (Schema白名单) + +指定数据采集的 Schema 白名单,即只采集白名单列表中列出的 Schema 相关数据。 + +### -S, schema-exclude (Schema黑名单) + +指定数据采集的 Schema 黑名单,即不采集黑名单列表中列出的 Schema 相关数据。 + +默认会将 Oracle 系统用户列入 Schema 黑名单中。 + +### -m, enable-monitor (资源监控) + +是否在后台启用资源监控子进程,默认启用。 + +资源监控子进程会定期(间隔 3 秒)查询当前服务器的 CPU 使用率,以及数据目录所在文件系统的剩余空间。 + +当 CPU 使用率高于 90%,或者文件系统剩余空间低于 100MB 时,监控子进程会触发信号,停止采集主进程,避免因为资源问题导致服务器故障。 + +### --slow-log (慢查询日志) + +采集时,指定 MySQL 慢查询日志文件。 + +在采集程序无法访问目标 MySQL 数据库,以及不在 MySQL 服务器上运行采集程序时,可手动将目标库 MySQL 慢日志取出来,使用当前选项指定慢日志即可解析并生成对应的采集数据,可用于后续 MySQL SQL 兼容性评估任务。 diff --git a/product/zh/docs-sca/v5.1/db2_to_mogdb.md b/product/zh/docs-sca/v5.1/db2_to_mogdb.md new file mode 100644 index 00000000..485a1046 --- /dev/null +++ b/product/zh/docs-sca/v5.1/db2_to_mogdb.md @@ -0,0 +1,72 @@ +--- +title: DB2到MogDB评估 +summary: DB2到MogDB评估 +author: Hong Rihua +date: 2022-03-07 +--- + +# DB2 到 MogDB 评估 + +## MogDB 环境准备 + +需要自行准备 MogDB/openGauss 数据库环境,用于资料库和目标库,资料库和目标库默认使用同一套环境。 + +## 对象结构迁移 + +- **结构迁移**: 需要在准备好的 MogDB/openGauss 环境中,创建目标库,并将源库中的数据结构迁移到新创建的目标库中。 +- **数据迁移**: 如果想要进行 SQL 性能对比评估,那么除了迁移数据库结构之外,还需要迁移表数据,确保源 DB2 与目标 MogDB/openGauss 库数据规模一致,性能对比才有意义。 + +相关工具: + +- **MTK**: 可使用 MTK 工具完成结构和数据迁移,具体详情请参考: [https://mogdb.io/mtk](https://mogdb.io/mtk) + +## DB2 数据采集 + +目前 DB2 的数据采集是从缓存中持续采集 SQL,默认情况下,采集过程会定期(每隔10分钟)进行一次采集,共采集一周的缓存 SQL 数据。 + +### 所需权限 + +采集用户所需权限如下: + +> dbadm + +### 相关命令 + +```shell +./sca_linux_x86_64 -T DC -h -p -n -u -e + +# 命令选项说明: +# -h/p/t/u/e 指定连接的源端 DB2 库的连接方式 +# 考虑到共享缓存中 SQL 内容不一定全面,采集会持续性增量采集,默认采集一周的数据。如果只是做功能性验证,可使用 -q 0.001 -Q 60 选项,即只进行一次 SQL 数据采集。 +``` + +### 采集结果 + +采集完成后,会生成一个 zip 数据包,该数据包可以拷贝到目标库,并解压成数据目录。 + +后续兼容性分析,则依赖此数据包中的数据,且无需再次连接到源 DB2 数据库。 + +## 资料库初始化 + +```shell +# 使用具有管理员权限的 MogDB/openGauss 用户,进行资料库初始化 +# 资料库初始化会默认创建名称为 sca_db 的资料库 +# 可通过 -N 指定资料库名称,-U 指定资料库用户名,-E 指定资料库用户密码 + +./sca_linux_x86_64 -T i -H -P -N sca_db -U sca_repo -E 'SCA@password' --user --password +``` + +## 执行分析任务 + +### 兼容性分析 + +DB2 数据库目前只支持 SQL 兼容性分析,命令如下: + +```shell +# 若资料库名称,用户名,密码非默认,则需要使用 -N, -U, -E 选项指定 +# 若目标库信息与资料库不同,则需要使用 -h, -p, -n, -u, -e 选项指定 + +./sca_linux_x86_64 -T DI -H -P -n -d +``` + +分析完成后,会在 -d 指定的目录中生成 report,可将 report 离线下载后进行查看。 diff --git a/product/zh/docs-sca/v5.1/mysql_to_mogdb.md b/product/zh/docs-sca/v5.1/mysql_to_mogdb.md new file mode 100644 index 00000000..e5b1a582 --- /dev/null +++ b/product/zh/docs-sca/v5.1/mysql_to_mogdb.md @@ -0,0 +1,86 @@ +--- +title: MySQL到MogDB评估 +summary: MySQL到MogDB评估 +author: Hong Rihua +date: 2022-03-07 +--- + +# MySQL 到 MogDB 评估 + +## MogDB 环境准备 + +需要自行准备 MogDB/openGauss 数据库环境,用于资料库和目标库,资料库和目标库默认使用同一套环境。 + +## 对象结构迁移 + +- **结构迁移**: 需要在准备好的 MogDB/openGauss 环境中,创建目标库,并将源库中的数据结构迁移到新创建的目标库中。 +- **数据迁移**: 如果想要进行 SQL 性能对比评估,那么除了迁移数据库结构之外,还需要迁移表数据,确保源 MySQL 与目标 MogDB/openGauss 库数据规模一致,性能对比才有意义。 + +相关工具: + +- **MTK**: 可使用 MTK 工具完成结构和数据迁移,具体详情请参考: [https://mogdb.io/mtk](https://mogdb.io/mtk) + +## MySQL 数据采集 + +目前 MySQL 源库 SQL 的采集依赖于慢日志(可记录到日志文件,或者记录到数据库表中),一次性采集慢日志中记录的所有 SQL。 + +为了避免 SQL 遗漏,必须确保整个业务周期均已测试执行或已覆盖。 + +### 所需权限 + +采集用户所需权限如下: + +> root + +### 开启慢 SQL 日志 + +- 需要提前开启慢日志,确保慢日志中记录完整的业务周期数据,建议提前一周开启慢日志 +- 慢 SQL 阈值间隔设置为 0 + +```sql +SET GLOBAL slow_query_log=ON; +SET GLOBAL long_query_time=0.001; +set global log_output='FILE'; -- 也支持 log_output='TABLE', 但内容相对较少 +``` + +### 数据采集 + +建议在慢日志开启后,运行完整业务周期之后,全流程业务运行完成后,再执行数据采集命令。 + +```shell +./sca_linux_x86_64 -T MC -h -p -n -u -e + +# 命令选项说明: +# -h/p/t/u/e 指定连接的源端 MySQL 库的连接方式 +``` + +### 采集结果 + +采集完成后,会生成一个 zip 数据包,该数据包可以拷贝到目标库,并解压成数据目录。 + +后续兼容性分析,则依赖此数据包中的数据,且无需再次连接到源 MySQL 数据库。 + +## 资料库初始化 + +```shell +# 使用具有管理员权限的 MogDB/openGauss 用户,进行资料库初始化 +# 资料库初始化会默认创建名称为 sca_db 的资料库 +# 可通过 -N 指定资料库名称,-U 指定资料库用户名,-E 指定资料库用户密码 + +./sca_linux_x86_64 -T i -H -P -N sca_db -U sca_repo -E 'SCA@password' --user --password +``` + +## 执行分析任务 + +### 兼容性分析 + +MySQL 数据库目前只支持 SQL 兼容性分析,命令如下: + +```shell +# 若资料库名称,用户名,密码非默认,则需要使用 -N, -U, -E 选项指定 +# 若目标库信息与资料库不同,则需要使用 -h, -p, -n, -u, -e 选项指定 + +./sca_linux_x86_64 -T OI -H -P -n -d +``` + +分析完成后,会在 -d 指定的目录中生成 report,可将 report 离线下载后进行查看。 diff --git a/product/zh/docs-sca/v5.1/oracle_to_mogdb.md b/product/zh/docs-sca/v5.1/oracle_to_mogdb.md new file mode 100644 index 00000000..b7171992 --- /dev/null +++ b/product/zh/docs-sca/v5.1/oracle_to_mogdb.md @@ -0,0 +1,105 @@ +--- +title: Oracle到MogDB评估 +summary: Oracle到MogDB评估 +author: Hong Rihua +date: 2022-03-07 +--- + +# Oracle 到 MogDB 评估 + +## MogDB 环境准备 + +需要自行准备 MogDB/openGauss 数据库环境,用于资料库和目标库,资料库和目标库默认使用同一套环境。 + +## 对象结构迁移 + +- **结构迁移**: 需要在准备好的 MogDB/openGauss 环境中,创建目标库,并将源库中的数据结构迁移到新创建的目标库中。 +- **数据迁移**: 如果想要进行 SQL 性能对比评估,那么除了迁移数据库结构之外,还需要迁移表数据,确保源 Oracle 与目标 MogDB/openGauss 库数据规模一致,性能对比才有意义。 + +相关工具: + +- **MTK**: 可使用 MTK 工具完成结构和数据迁移,具体详情请参考: [https://mogdb.io/mtk](https://mogdb.io/mtk) + +## Oracle 数据采集 + +目前 Oracle 的数据采集是从缓存中持续采集 SQL,默认情况下,采集过程会定期(每隔10分钟)进行一次采集,共采集一周的缓存 SQL 数据。 + +### 所需权限 + +采集用户所需权限如下: + +> SELECT ANY DICTIONARY +> +> CREATE PROCEDURE +> +> EXECUTE ON DBMS_LOB +> +> SELECT ON GV_$SQL_PLAN + +### 相关命令 + +```shell +./sca_linux_x86_64 -T OC -s SCOTT -h -p -n -u -e + +# 命令选项说明: +# -s 指定需要采集的 Schema 列表 +# -h/p/t/u/e 指定连接的源端 Oracle 库的连接方式 +# 考虑到共享缓存中 SQL 内容不一定全面,采集会持续性增量采集,默认采集一周的数据。如果只是做功能性验证,可使用 -q 0.001 -Q 60 选项,即只进行一次 SQL 数据采集。 +``` + +### 采集结果 + +采集完成后,会生成一个 zip 数据包,该数据包可以拷贝到目标库,并解压成数据目录。 + +后续兼容性分析,则依赖此数据包中的数据,且无需再次连接到源 Oracle 数据库。 + +## 资料库初始化 + +```shell +# 使用具有管理员权限的 MogDB/openGauss 用户,进行资料库初始化 +# 资料库初始化会默认创建名称为 sca_db 的资料库 +# 可通过 -N 指定资料库名称,-U 指定资料库用户名,-E 指定资料库用户密码 + +./sca_linux_x86_64 -T i -H -P -N sca_db -U sca_repo -E 'SCA@password' --user --password +``` + +## 执行分析任务 + +### 兼容性分析 + +若只做 SQL 兼容性分析,则使用如下命令: + +```shell +# 若资料库名称,用户名,密码非默认,则需要使用 -N, -U, -E 选项指定 +# 若目标库信息与资料库不同,则需要使用 -h, -p, -n, -u, -e 选项指定 + +./sca_linux_x86_64 -T OI -H -P -n -d +``` + +分析完成后,会在 -d 指定的目录中生成 report,可将 report 离线下载后进行查看。 + +### 性能对比分析 + +若只做 SQL 性能对比分析,则使用如下命令: + +```shell +# 若资料库名称,用户名,密码非默认,则需要使用 -N, -U, -E 选项指定 +# 若目标库信息与资料库不同,则需要使用 -h, -p, -n, -u, -e 选项指定 + +./sca_linux_x86_64 -T OS -H -P -n -d +``` + +分析完成后,会在 -d 指定的目录中生成 report,可将 report 离线下载后进行查看。 + +### 兼容性分析 + 性能对比分析 + +若需要同时做 SQL 兼容性分析和 SQL 性能对比,则使用如下命令: + +```shell +# 若资料库名称,用户名,密码非默认,则需要使用 -N, -U, -E 选项指定 +# 若目标库信息与资料库不同,则需要使用 -h, -p, -n, -u, -e 选项指定 + +./sca_linux_x86_64 -T OIS -H -P -n -d +``` + +分析完成后,会在 -d 指定的目录中生成 report,可将 report 离线下载后进行查看。 diff --git a/product/zh/docs-sca/v5.1/overview.md b/product/zh/docs-sca/v5.1/overview.md new file mode 100644 index 00000000..b43fd678 --- /dev/null +++ b/product/zh/docs-sca/v5.1/overview.md @@ -0,0 +1,59 @@ +--- +title: SCA 介绍 +summary: SCA 介绍 +author: Hong Rihua +date: 2022-03-07 +--- + +# SCA 介绍 + +SCA 全称 SQL Compatible Analysis,是一款异构数据库迁移前的 SQL 兼容和性能评估工具。 + +可用于异构数据迁移前的兼容性评估,评估源数据库中的实际业务 SQL 在目标库中是否存在语法问题,以及评估两款异构数据库中的 SQL 实际执行的性能差异。 + +本工具目前支持三种源端数据库,包括: Oracle, MySQL, DB2。 + +**适用于**: MogDB(以及其他基于 openGauss 的数据库) + +SCA 程序可运行如下 10 种类型的任务: + +1. `I [Initialize]`: **初始化**,用于初始化 SCA 资料库 +2. `L [Apply License]`: **申请 License**,用于 License 申请的任务 +3. `OC [Oracle Collection]`: **Oracle采集**,用于采集 Oracle 数据库中执行过的业务 SQL,需要长时间采集 +4. `MC [MySQL Collection]`: **MySQL采集**,用于采集 MySQL 数据库中执行过的业务 SQL,需提前配置慢日志,然后一次性采集 +5. `DC [DB2 Collection]`: **DB2采集**,用于采集 DB2 数据库中执行过的业务 SQL,需要长时间采集 +6. `OI [Oracle Inspection]`: **Oracle兼容评估**,用于评估源端 Oracle 中采集到的业务 SQL,在目标端 MogDB 中的实际兼容性 +7. `MI [MySQL Inspection]`: **MySQL兼容评估**,用于评估源端 MySQL 中采集到的业务 SQL,在目标端 MogDB 中的实际兼容性 +8. `DI [DB2 Inspection]`: **DB2兼容评估**,用于评估源端 DB2 中采集到的业务 SQL,在目标端 MogDB 中的实际兼容性 +9. `OS [Oracle Simulation]`: **Oracle性能评估**,用于评估源端 Oracle 中采集到的业务 SQL,在目标端 MogDB 中的执行性能 +10. `OIS [Oracle Inspection & Simulation]`: **Oracle兼容和性能评估**,等同于 OI + OS 两种任务同时进行 + +注意: + +- 兼容评估,需要有完整的源库结构,推荐使用 MTK 工具进行源数据库结构迁移,测试用途下也可使用 SCA 自动创建测试目标库结构 +- SQL模拟,需要有完整的源库数据和数据,推荐使用 MTK 工具进行源数据库结构以及数据迁移 + +## 操作系统与平台支持 + +SCA 目前支持在如下操作系统和平台架构下运行: + +1. Linux x86_64 +2. Linux arm64 (ARM 平台由于没有 DB2 客户端,故不支持 DB2 数据采集) +3. MacOS (MacOS 版本不支持 openGauss/MogDB 中的 SHA256 加密认证方式) + +## 数据库支持 + +SCA 目前支持的源端与目标端数据库类型如下: + +1. 源端: + - Oracle: 不低于 10.2 + - MySQL: 不低于 5.5 + - DB2: 不低于 11.5 + - PostgreSQL: 不低于 9 + - File: MySQL slow-log + +2. 目标端: + - MogDB/openGauss: 不低于 2.0 + - PostgreSQL: 不低于 13.0 + +**注意**: 平台支持,与数据库支持没有相关性,数据库可以运行在其他平台,如 Windows/AIX 等,只要从数据库到 SCA 运行主机网络与端口互通即可。 diff --git a/product/zh/docs-sca/v5.1/pg_to_mogdb.md b/product/zh/docs-sca/v5.1/pg_to_mogdb.md new file mode 100644 index 00000000..16ce40cc --- /dev/null +++ b/product/zh/docs-sca/v5.1/pg_to_mogdb.md @@ -0,0 +1,73 @@ +--- +title: PostgreSQL到MogDB评估 +summary: PostgreSQL到MogDB评估 +author: Hong Rihua +date: 2022-04-19 +--- + +# PostgreSQL 到 MogDB 评估 + +## MogDB 环境准备 + +需要自行准备 MogDB/openGauss 数据库环境,用于资料库和目标库,资料库和目标库默认使用同一套环境。 + +## 对象结构迁移 + +- **结构迁移**: 需要在准备好的 MogDB/openGauss 环境中,创建目标库,并将源库中的数据结构迁移到新创建的目标库中。 +- **数据迁移**: 如果想要进行 SQL 性能对比评估,那么除了迁移数据库结构之外,还需要迁移表数据,确保源 PostgreSQL 与目标 MogDB/openGauss 库数据规模一致,性能对比才有意义。 + +相关工具: + +- **MTK**: 可使用 MTK 工具完成结构和数据迁移,具体详情请参考: [https://mogdb.io/mtk](https://mogdb.io/mtk) + +## PostgreSQL 数据采集 + +目前 PostgreSQL 源库 SQL 的采集依赖于插件 pg_stat_statements 插件,一次性采集插件表中记录的所有 SQL。 + +为了避免 SQL 遗漏,建议调整插件中保留的 SQL 最大数量,且确保整个业务周期均已测试执行或已覆盖。 + +### 所需权限 + +采集用户所需权限如下: + +> superuser (用于查询数据字典和 pg_stat_statements 插件表) + +### 相关命令 + +```shell +./sca_linux_x86_64 -T PC -h -p -n -u -e + +# 命令选项说明: +# -h/p/t/u/e 指定连接的源端 PostgreSQL 库的连接方式 +``` + +### 采集结果 + +采集完成后,会生成一个 zip 数据包,该数据包可以拷贝到目标库,并解压成数据目录。 + +后续兼容性分析,则依赖此数据包中的数据,且无需再次连接到源 PostgreSQL 数据库。 + +## 资料库初始化 + +```shell +# 使用具有管理员权限的 MogDB/openGauss 用户,进行资料库初始化 +# 资料库初始化会默认创建名称为 sca_db 的资料库 +# 可通过 -N 指定资料库名称,-U 指定资料库用户名,-E 指定资料库用户密码 + +./sca_linux_x86_64 -T i -H -P -N sca_db -U sca_repo -E 'SCA@password' --user --password +``` + +## 执行分析任务 + +### 兼容性分析 + +PostgreSQL 数据库目前只支持 SQL 兼容性分析,命令如下: + +```shell +# 若资料库名称,用户名,密码非默认,则需要使用 -N, -U, -E 选项指定 +# 若目标库信息与资料库不同,则需要使用 -h, -p, -n, -u, -e 选项指定 + +./sca_linux_x86_64 -T PI -H -P -n -d +``` + +分析完成后,会在 -d 指定的目录中生成 report,可将 report 离线下载后进行查看。 diff --git a/product/zh/docs-sca/v5.1/release-notes.md b/product/zh/docs-sca/v5.1/release-notes.md new file mode 100644 index 00000000..964e4c43 --- /dev/null +++ b/product/zh/docs-sca/v5.1/release-notes.md @@ -0,0 +1,87 @@ +--- +title: 发布记录 +summary: 发布记录 +author: Hong Rihua +date: 2022-03-07 +--- + +# 发布记录 + +## v5.1.0 + +- [sca_linux_arm64](https://cdn-mogdb.enmotech.com/sca/SCA_v5.1.0/sca_linux_arm64) +- [sca_linux_x86_64](https://cdn-mogdb.enmotech.com/sca/SCA_v5.1.0/sca_linux_x86_64) +- [sca_macos_x86_64](https://cdn-mogdb.enmotech.com/sca/SCA_v5.1.0/sca_macos_x86_64) +- [示例报告](https://cdn-mogdb.enmotech.com/sca/SCA_v5.0.0/SCA_sample_report_v5.0.zip) + +### Features + +- 增加源数据库支持: PostgreSQL,其 SQL 数据来源于 pg_stat_statements 插件采集 +- 增加对象信息采集: 所有源库均会自动采集字段信息与 PLSQL 参数信息,并在 SQL 兼容性审核中自动创建使用 + +### Bugfixs + +- 修复部分情况下,进度条输出会因队列问题导致异常的报错问题 +- 修复 MySQL 采集过程中 performance_schema 未能识别为 SYSTEM_CATALOG 的问题 +- 修复 MySQL 对象信息采集时,没有过滤系统内置对象的问题 +- 修复替换 openGauss 驱动后,-D 选项由于类型绑定问题导致的报错 +- 修复 License 验证时,未能验证 License 失效时间的问题 +- 修复慢日志文件中,SQL 文本中含有 # 开头的行,导致解析报错的问题 +- 修复 SQL 兼容度汇总页面中,对于源库为 Oracle 时,表格数据排序不正常的问题 +- 修复因为 JS 问题导致的部分浏览器下离线报告无法展示的问题 +- 修复 MySQL 慢日志采集时,解析不到 ID 数据导致的异常退出 +- 修复 --debug 选项不识别,需要指定值的问题 +- 修复部分场景下 Oracle 数据采集字符集报错的问题 ORA-29275 (字符集转换 + 拼接空字符串) +- 修复 Oracle 为源库的报告中,由于 GaussDB 资料库没有 median 函数,导致的复杂度数据查询报错 + +### Improvements + +- 连接目标库之后统一设置 search_path 包含所有 schema,降低 SQL 兼容度分析进程中找不到表的概率 +- 增加 SQL 改写规则,针对 Oracle 数据库增加了部分 SQL 改写规则 +- csv 格式的详细 SQL 列表调整,使用中文表头和部分中文内容,增加支持类别字段,便于阅读和理解 +- 调整 SQL 兼容度汇总页面数据逻辑,NULL 相关改写不算改写支持 +- 调整 MySQL 数据获取逻辑,严格按照 log_output 中 FILE/TABLE 的设定获取数据 +- 驱动及配套代码调整,对于 PostgreSQL 库使用 psycopg2 驱动,对于 openGauss 库使用 py_opengauss 驱动 +- 移除部分冗余的 format_size 函数及其调用 + +## v5.0.0 + +- [sca_linux_arm64](https://cdn-mogdb.enmotech.com/sca/SCA_v5.0.0/sca_linux_arm64) +- [sca_linux_x86_64](https://cdn-mogdb.enmotech.com/sca/SCA_v5.0.0/sca_linux_x86_64) +- [sca_macos_x86_64](https://cdn-mogdb.enmotech.com/sca/SCA_v5.0.0/sca_macos_x86_64) +- [示例报告](https://cdn-mogdb.enmotech.com/sca/SCA_v5.0.0/SCA_sample_report_v5.0.zip) + +### Feature + +- 代码逻辑重构,命令选项和使用方式有较大改变 +- 支持 MySQL 连库采集,通过数据库表中的慢日志信息采集,或者通过服务器端本地慢日志采集 +- 支持 DB2 数据的连库采集,采集方式与 Oracle 类似,需要长期运行,默认采集一周 +- 支持 DB2 SQL 兼容性分析,前提是需要提前准备好目标 MogDB 数据库 +- 支持 Oracle SQL 详情列表文件的导出 (sql_detail_list.csv) +- 调整 Oracle 数据采集逻辑,移除不必要的对象信息与系统配置信息 +- 调整资料库结构,只保留源库基础信息,SQL兼容审核,SQL性能对比等相关的结构 + +## v4.1.0 + +- [SCA使用说明_v4.1.0.pdf](https://cdn-mogdb.enmotech.com/sca/SCA_v4.1.0/SCA使用说明_v4.1.0.pdf) +- [sca_linux_arm64](https://cdn-mogdb.enmotech.com/sca/SCA_v4.1.0/sca_linux_arm64) +- [sca_linux_x86_64](https://cdn-mogdb.enmotech.com/sca/SCA_v4.1.0/sca_linux_x86_64) +- [sca_macos_x86_64](https://cdn-mogdb.enmotech.com/sca/SCA_v4.1.0/sca_macos_x86_64) + +### Feature + +- 支持 MySQL 数据库基于 slow-log 和 general-log 的 SQL 兼容度分析 +- 支持自定义 SQL 转换规则 + +## v4.0.0 + +- [SCA使用说明_v4.0.0.pdf](https://cdn-mogdb.enmotech.com/sca/SCA_v4.0.0/SCA使用说明_v4.0.0.pdf) +- [sca_linux_arm64](https://cdn-mogdb.enmotech.com/sca/SCA_v4.0.0/sca_linux_arm64) +- [sca_linux_x86_64](https://cdn-mogdb.enmotech.com/sca/SCA_v4.0.0/sca_linux_x86_64) +- [sca_macos_x86_64](https://cdn-mogdb.enmotech.com/sca/SCA_v4.0.0/sca_macos_x86_64) + +### Feature + +- 集成全新设计的离线分析报告 +- 支持单个可执行程序的数据采集,兼容性分析与性能评估 +- 支持 Linux 系统下的 openGauss/MogDB 数据库的 sha256 密码认证方式 diff --git a/product/zh/docs-sca/v5.1/result.md b/product/zh/docs-sca/v5.1/result.md new file mode 100644 index 00000000..afe8f432 --- /dev/null +++ b/product/zh/docs-sca/v5.1/result.md @@ -0,0 +1,173 @@ +--- +title: SCA 结果说明 +summary: SCA 结果说明 +author: Hong Rihua +date: 2022-03-07 +--- + +# SCA 结果说明 + +SCA 结果分为两类: + +- **采集结果** : 源库(Oracle)中的数据采集结果,该结果会自动打包为一个 zip 文件,并在采集最后给出明确文件位置提示。 +- **分析结果** : 在目标 MogDB/openGauss 中执行完分析后生成的分析报告,报告为单独一个文件夹,其中为离线 HTML 格式的报告文档,可任意进行拷贝传阅。 + +## 采集结果 + +数据采集结果会自动打包成 zip 数据包,默认存储在程序当前目录下。 + +采集完成之后的结果提示信息如下: + +``` +2022-02-15 19:20:40.301126 INFO [runMe.py:356] +==================== [ Summary Information ] ====================+ +2022-02-15 19:20:40.301184 INFO [runMe.py:357] | Task Name File Name File Size | +2022-02-15 19:20:40.301222 INFO [runMe.py:358] | --------------------- ------------------------------ ---------- | +2022-02-15 19:20:40.301260 INFO [runMe.py:360] | SCA_SESSION_SQL sca_sql_information.dat 3.65 KB | +2022-02-15 19:20:40.301294 INFO [runMe.py:360] | SCA_SESSION_SQL_PERF sca_sql_performance.dat 3.29 KB | +2022-02-15 19:20:40.301326 INFO [runMe.py:360] | SCA_MYSQL_USER_HOST sca_mysql_user_host.dat 1815 B | +2022-02-15 19:20:40.301357 INFO [runMe.py:360] | SCA_DATABASE sca_database.dat 163 B | +2022-02-15 19:20:40.301387 INFO [runMe.py:361] +=================================================================+ + + >>> Final Result is: + >>> ---------------------------------------------- + >>> /Users/hongyedba/Desktop/SCA_MySQL_test.zip +``` + +## 分析报告 + +兼容性分析与 SQL 性能模拟均会生成对应的分析报告,分析报告默认位于指定的数据目录中,也可以使用 `-r` 选项指定报告的输出目录。 + +### 报告入口 + +分析报告目录中,`index.html` 为报告的入口,点击该文件,使用默认浏览器(推荐使用Chrome)打开,即可查看分析报告。 + +![img](https://cdn-mogdb.enmotech.com/docs-media/sca/result-1.png) + +![img](https://cdn-mogdb.enmotech.com/docs-media/sca/result-2.png) + +### SQL 兼容度汇总 + +SQL 兼容度汇总页面展示本次兼容度分析的相关结果数据,页面表格中按照用户名,程序名,模块名汇总,展示系统中采集到的所有 SQL,以及这些 SQL 在 MogDB 中的支持情况。 + +注意: 不同数据库中,该表格展示的内容也略有差异。 + +![img](https://cdn-mogdb.enmotech.com/docs-media/sca/result-3.png) + +### SQL 改写规则 + +SQL 改写规则页面展示本次分析中涉及到的 SQL 改写相关的规则信息。 + +其中使用情况字段展示该条规则的触发情况: + +- Match 为规则在 SQL 中的命中数量 +- Count 为规则匹配的 SQL 数量 + +![img](https://cdn-mogdb.enmotech.com/docs-media/sca/result-4.png) + +### SQL 复杂度分布 + +SQL 复杂度分布页面展示采集到的 SQL 的复杂度分布情况。 +复杂度分布目前的评判标准如下: + +1. SQL 涉及的表的数量,表数量越多,复杂度越高 +2. SQL 中使用 connect by 语法的次数,使用 connect by 的次数越多,越有可能出现执行性能问题,对应 SQL 的复杂度越高 +3. SQL 中使用自定义函数的数量,自定义函数中的逻辑复杂度不明,所以使用自定义函数越多,SQL 复杂度越高 +4. SQL 在 Oracle 中的实际执行函数,执行耗时越高,则认为对应的 SQL 复杂度越高 + +最终每条 SQL 的复杂度则按照以上 4 个评判标准进行汇总。SQL 复杂度越高,在迁移后越需要关注 SQL 的执行性能,避免性能问题导致的业务故障。 + +![img](https://cdn-mogdb.enmotech.com/docs-media/sca/result-5.png) + +### SQL 性能对比(性能对比汇总) + +SQL 性能对比汇总页面展示两个信息: + +1. 性能对比的基础信息,性能对比的一些基础配置,以及对比时使用到的相关阈值设置 +2. SQL 性能汇总,按照总体,提升,下降,不支持,超时等维度对 SQL 进行汇总,分析各类 SQL 对整体负载的影响 + +![img](https://cdn-mogdb.enmotech.com/docs-media/sca/result-6.png) + +### SQL 性能对比(Top by Workload/SQL, Timeout) + +SQL 性能对比中 Top by Workload, Top by SQL, Timeout 等页面内容格式较为相似,以 Top by Workload 为例进行说明。 + +该列表展示影响最大的 100 条 SQL,其中 SQL FMS 字段为超链接,点击可进一步查看对应 SQL 的分析详情。 + +SQL 的性能影响有两个评估维度: + +1. SQL 影响: 当前 SQL 在单条 SQL 执行情况下的性能变化影响的比例 +2. 负载影响: 当前 SQL 参考其总执行次数,综合评估其对整个 SQL 负载的性能变化,以及对整体 SQL 性能的影响 + +![img](https://cdn-mogdb.enmotech.com/docs-media/sca/result-7.png) + +### SQL 性能对比(SQL 详情) + +SQL详情页面展示如下几个方面的内容: + +1. SQL 执行信息: SQL 在 Oracle 以及 MogDB 中的执行信息,其中 Oracle 中的执行信息来源于动态性能视图,MogDB 中的执行信息来源于实际 SQL 执行。 +2. SQL 文本: SQL 在 Oracle 中的执行文本,以及在 MogDB 中的实际执行文本。 +3. SQL 绑定变量: SQL 在 Oracle 中的绑定变量信息,该绑定变量信息会应用到 MogDB 中执行的 SQL 文本内,使其能在 MogDB 中真实的模拟业务执行。 +4. Oracle 执行计划: SQL 在 Oracle 中的执行计划,该执行计划来源于动态性能视图。 +5. MogDB 执行计划: SQL 在 MogDB 中的执行计划,该执行计划来源于实际执行,程序会自动对 MogDB 中的执行计划进行初步分析,标识出其中潜在的性能问题点。 +6. MogDB 对象信息: SQL 中涉及到的对象在 MogDB 中的相关结构,以及统计信息。 + +![img](https://cdn-mogdb.enmotech.com/docs-media/sca/result-8.png) + +![img](https://cdn-mogdb.enmotech.com/docs-media/sca/result-9.png) + +![img](https://cdn-mogdb.enmotech.com/docs-media/sca/result-10.png) + +### 详细 SQL 支持列表 + +在报告的根目录中,`sql_detail_list.csv` 文件中记录了本次 SQL 兼容度评估涉及到的所有 SQL 的支持情况,以及部分 SQL 在评估过程中的自动化改写策略和改写后的 SQL 文本。 + +文件中的字段内容主要根据数据库的不同,略有差异。 + +#### Oracle + +1. 用户名: 执行 SQL 的 Schema 信息,在 Oracle 中一般对应于用户 +2. SQL类型: SQL 类型,区分采集到的 SQL 是系统 SQL 还是用户的业务 SQL,取值为: USER, SYSTEM_CATALOG, SYSTEM_COMMAND +3. 应用模块: 执行 SQL 语句的客户端 module +4. 应用操作: 执行 SQL 语句的客户端 action +5. MogDB异常代码: MogDB 中的 SQL 执行错误码 +6. MogDB异常信息: MogDB 中的 SQL 执行报错信息 +7. 支持类别: SQL 在 MogDB 中的支持类别:直接支持,改写支持,不支持 +8. 原始SQL: 原始 SQL 文本 +9. 转换规则: SQL 满足的自动化改写规则 +10. 改写SQL: 经过自动化改写后的 SQL 文本 + +#### MySQL + +1. 库名(schema): 执行 SQL 的 Schema 信息,在 MySQL 中一般对应于数据库 +2. SQL类型: SQL 类型,区分采集到的 SQL 是系统 SQL 还是用户的业务 SQL,取值为: USER, SYSTEM_CATALOG, SYSTEM_COMMAND +3. 用户主机: 执行 SQL 的 MySQL 客户端用户和对应的执行客户端主机信息 +4. MySQL错误代码: SQL 在 MySQL 中的执行情况,如果是 0 则表明 SQL 执行成功,非 0 表示在 MySQL 中执行报错 +5. MogDB异常代码: MogDB 中的 SQL 执行错误码 +6. MogDB异常信息: MogDB 中的 SQL 执行报错信息 +7. 支持类别: SQL 在 MogDB 中的支持类别:直接支持,改写支持,不支持 +8. 原始SQL: 原始 SQL 文本 +9. 转换规则: SQL 满足的自动化改写规则 +10. 改写SQL: 经过自动化改写后的 SQL 文本 + +#### DB2 + +1. Schema: 执行 SQL 的 Schema 信息 +2. SQL类型: SQL 类型,区分采集到的 SQL 是系统 SQL 还是用户的业务 SQL,取值为: USER, SYSTEM_CATALOG, SYSTEM_COMMAND +3. 语句类型: DB2 中记录的 SQL 语句类型 +4. MogDB异常代码: MogDB 中的 SQL 执行错误码 +5. MogDB异常信息: MogDB 中的 SQL 执行报错信息 +6. 支持类别: SQL 在 MogDB 中的支持类别:直接支持,改写支持,不支持 +7. 原始SQL: 原始 SQL 文本 +8. 转换规则: SQL 满足的自动化改写规则 +9. 改写SQL: 经过自动化改写后的 SQL 文本 + +#### PostgreSQL + +1. Schema: 执行 SQL 的 Schema 信息 +2. SQL类型: SQL 类型,区分采集到的 SQL 是系统 SQL 还是用户的业务 SQL,取值为: USER, SYSTEM_CATALOG, SYSTEM_COMMAND +3. MogDB异常代码: MogDB 中的 SQL 执行错误码 +4. MogDB异常信息: MogDB 中的 SQL 执行报错信息 +5. 支持类别: SQL 在 MogDB 中的支持类别:直接支持,改写支持,不支持 +6. 原始SQL: 原始 SQL 文本 +7. 转换规则: SQL 满足的自动化改写规则 +8. 改写SQL: 经过自动化改写后的 SQL 文本 diff --git a/product/zh/docs-sca/v5.1/toc.md b/product/zh/docs-sca/v5.1/toc.md new file mode 100644 index 00000000..b8752118 --- /dev/null +++ b/product/zh/docs-sca/v5.1/toc.md @@ -0,0 +1,16 @@ + + +# 文档 + +## SCA 文档目录 + ++ [简介](/overview.md) ++ [使用说明](/usage.md) ++ [命令选项](/command_options.md) ++ [结果说明](/result.md) ++ [下载&发布](/release-notes.md) ++ 使用示例 + + [Oracle到MogDB评估](/oracle_to_mogdb.md) + + [MySQL到MogDB评估](/mysql_to_mogdb.md) + + [DB2到MogDB评估](/db2_to_mogdb.md) + + [PostgreSQL到MogDB评估](/pg_to_mogdb.md) diff --git a/product/zh/docs-sca/v5.1/usage.md b/product/zh/docs-sca/v5.1/usage.md new file mode 100644 index 00000000..12c956a7 --- /dev/null +++ b/product/zh/docs-sca/v5.1/usage.md @@ -0,0 +1,70 @@ +--- +title: SCA 使用说明 +summary: SCA 使用说明 +author: Hong Rihua +date: 2022-03-07 +--- + +# SCA 使用说明 + +## 部署步骤 + +程序已打包为二进制可执行文件,无需额外的部署操作。 + +程序文件上传后,确认能正常通过网络连接数据库即可直接使用。 + +## 常用命令 + +注意:以下以 x86-64 架构下的 Linux 系统为例进行说明,其他 ARM 平台或 MacOS 系统需要修改对应的命令名称。 + +* 申请 License (软件第一次运行的时候,需要先联机申请 License) + +```shell +# 运行过程中,需要输入用户邮箱 +# 申请到的 License 数据会发送到输入的邮箱中。 +# 将 License 数据拷贝出来,并写入 SCA 同目录中的 license.json 文件内即可 +./sca_linux_x86_64 -T L +``` + +* 初始化资料库 (对于同一个目标MogDB数据库,只在第一次运行时需要初始化资料库) + +```shell +./sca_macos_x86_64 -T i -H -P -N -U -E --user --password +``` + +* Oracle 数据采集 (无需资料库) + +```shell +# 作为测试用途,可添加如下参数加快 SQL 采集: -q 0.001 -Q 60 -m off +./sca_linux_x86_64 -T OC -s SCOTT -h -p -n -u -e +``` + +* 从指定 MySQL 慢日志采集 + +```shell +./sca_linux_x86_64 -T MC -d --slow-log= +``` + +* 从指定 MySQL 服务器进行自动采集 + +```shell +./sca_linux_x86_64 -T MC -d -h -p -n -u -e +``` + +* 进行 Oracle 兼容评估 (资料库已初始化,且资料库登录信息均为默认值) + +```shell +./sca_linux_x86_64 -T OI -d -n +``` + +* 只进行 SQL 模拟 (资料库已初始化,且资料库用户密码信息均为默认值) + +```shell +./sca_linux_x86_64 -T OS -h -p -n -d +``` + +* 同时进行SQL兼容度与性能评估 (资料库已初始化) + +```shell +./sca_linux_x86_64 -T OIS -h -p -n -u -e -d +``` diff --git a/src/utils/config.js b/src/utils/config.js index 568443ae..940dd065 100644 --- a/src/utils/config.js +++ b/src/utils/config.js @@ -1,8 +1,8 @@ /** * 工具设置 */ -const allToolsMenu = [ - { + const allToolsMenu = [ + { zh: '管理工具', en: 'Management Tools', key: 'tools', @@ -21,8 +21,8 @@ const allToolsMenu = [ disabled: true } ] - }, - { + }, + { zh: 'Migrate to MogDB解决方案', en: 'Migrate to MogDB Solution', key: 'o2o', @@ -54,16 +54,16 @@ const allToolsMenu = [ disabled: false } ] - } -] + } + ] /** * Version * @description 版本设定,便于设置可上线的文档版本 * value:真实目录名,用于URL, label:用于展示, disabled:是否可用 */ -const allProductVersions = { - mogdb: { + const allProductVersions = { + mogdb: { 'v2.1': { value: 'v2.1', label: 'v2.1', @@ -89,8 +89,8 @@ const allProductVersions = { label: 'v1.1.0', disabled: false } - }, - mogha: { + }, + mogha: { 'v2.3': { value: 'v2.3', label: 'v2.3', @@ -101,17 +101,22 @@ const allProductVersions = { label: 'v2.0', disabled: false } - }, - manager: null, - mtk: { + }, + manager: null, + mtk: { 'v2.0': { value: 'v2.0', label: 'stable', disabled: false } - }, - mdb: null, - sca: { + }, + mdb: null, + sca: { + 'v5.1': { + value: 'v5.1', + label: 'v5.1', + disabled: false + }, 'v5.0': { value: 'v5.0', label: 'v5.0', @@ -127,8 +132,8 @@ const allProductVersions = { label: 'v4.0', disabled: false } - }, - mvd: { + }, + mvd: { 'v2.4': { value: 'v2.4', label: 'v2.4', @@ -139,15 +144,15 @@ const allProductVersions = { label: 'v2.0', disabled: false } - } -} -// 获取所有有效的产品,排除未上线产品 -const allProductKeys = Object.keys(allProductVersions).filter(p => allProductVersions[p]) -// 获取所有有效的产品的最新版本 -const allProductCurrentVersion = {} -allProductKeys.forEach(p => { - allProductCurrentVersion[p] = Object.keys(allProductVersions[p]).filter(v => !allProductVersions[p][v].disabled)[0] -}) + } + } + // 获取所有有效的产品,排除未上线产品 + const allProductKeys = Object.keys(allProductVersions).filter(p => allProductVersions[p]) + // 获取所有有效的产品的最新版本 + const allProductCurrentVersion = {} + allProductKeys.forEach(p => { + allProductCurrentVersion[p] = Object.keys(allProductVersions[p]).filter(v => !allProductVersions[p][v].disabled)[0] + }) // 产品对应的key与label const allProductLabels = { -- Gitee