diff --git a/.gitignore b/.gitignore index b64ad0460cec03eb7eba934931ade8b5882fed36..6ce27b3dbb51a3328bed0e93d3240ecc2c8c2dbc 100644 --- a/.gitignore +++ b/.gitignore @@ -77,3 +77,4 @@ Thumbs.db CMakeFiles cmake_install.cmake *_autogen +.worktrees/* diff --git a/CMakeLists.txt b/CMakeLists.txt index f929bc6d9217a81e73cf4bb9f4d6f8012a74e8b5..94aef609527040f8d695283843584dead078545a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,32 +41,26 @@ find_package(QT NAMES Qt6 Qt5 COMPONENTS Core Gui Widgets REQUIRED) find_package(Qt${QT_VERSION_MAJOR} COMPONENTS Core Gui Widgets REQUIRED) -set(QUAZIP_LIB "") set(POPPLER_LIB "") if(QT_VERSION_MAJOR EQUAL 5) - set(QUAZIP_LIB quazip) set(POPPLER_LIB poppler-qt5) elseif (QT_VERSION_MAJOR EQUAL 6) - set(QUAZIP_LIB quazip1-qt6) set(POPPLER_LIB poppler-qt6) endif () find_package(PkgConfig REQUIRED) -set(UKUI_FILE_METADATA_EXTERNAL_LIBS "") -set(UKUI_FILE_METADATA_PC_PKGS uchardet libavcodec libavformat libavutil libswscale taglib ${POPPLER_LIB} ${QUAZIP_LIB}) -foreach(PC_LIB IN ITEMS ${UKUI_FILE_METADATA_PC_PKGS}) - string(TOUPPER "${PC_LIB}" UPPER_PC_LIB) - pkg_check_modules(${UPPER_PC_LIB} REQUIRED IMPORTED_TARGET ${PC_LIB}) - if(${${UPPER_PC_LIB}_FOUND}) - include_directories(${${UPPER_PC_LIB}_INCLUDE_DIRS}) - link_directories(${${UPPER_PC_LIB}_LIBRARY_DIRS}) - list(APPEND UKUI_FILE_METADATA_LIBS PkgConfig::${PC_LIB}) - endif() +set(UKUI_FILE_METADATA_PC_PKGS uchardet libavcodec libavformat libavutil libswscale taglib minizip tesseract lept) +foreach(PC_LIB IN LISTS UKUI_FILE_METADATA_PC_PKGS) + string(TOUPPER "${PC_LIB}" PC_PREFIX) + # Normalize libav* package names to the shorter imported-target prefixes + # used elsewhere in the tree, e.g. libavcodec -> AVCODEC. + string(REGEX REPLACE "^LIB" "" PC_PREFIX "${PC_PREFIX}") + pkg_check_modules(${PC_PREFIX} REQUIRED IMPORTED_TARGET ${PC_LIB}) endforeach() - -string(TOUPPER "${QUAZIP_LIB}" QUAZIP_NAME) -string(TOUPPER "${POPPLER_LIB}" POPPLER_NAME) +# poppler-qt5 / poppler-qt6 do not fit the generic prefix normalization above, +# so keep a stable POPPLER imported-target prefix for consumers. +pkg_check_modules(POPPLER REQUIRED IMPORTED_TARGET ${POPPLER_LIB}) enable_testing() add_subdirectory(src) @@ -76,4 +70,3 @@ add_subdirectory(tests) endif() feature_summary(WHAT ALL INCLUDE_QUIET_PACKAGES FATAL_ON_MISSING_REQUIRED_PACKAGES) - diff --git a/autotests/CMakeLists.txt b/autotests/CMakeLists.txt index 31aee624f281d0e3d26dbb6fd8858cf4aa05ec67..4cd073330c37be93aa25ad6cfd9e24551d6d2277 100644 --- a/autotests/CMakeLists.txt +++ b/autotests/CMakeLists.txt @@ -14,15 +14,14 @@ include_directories(../src) add_executable(ffmpegExtractorTest ffmpeg-extractortest.cpp ../src/extractors/ffmpeg-extractor.cpp) -target_include_directories(ffmpegExtractorTest SYSTEM PRIVATE ${LIBAVCODEC_INCLUDE_DIRS} ${LIBAVFORMAT_INCLUDE_DIRS} ${LIBAVUTIL_INCLUDE_DIRS} ${SWSCALE_INCLUDE_DIRS}) target_link_libraries(ffmpegExtractorTest PUBLIC Qt${QT_VERSION_MAJOR}::Core Qt${QT_VERSION_MAJOR}::Test ukui-file-metadata - ${LIBAVCODEC_LIBRARIES} - ${LIBAVFORMAT_LIBRARIES} - ${LIBAVUTIL_LIBRARIES} - ${LIBSWSCALE_LIBRARIES} + PkgConfig::AVCODEC + PkgConfig::AVFORMAT + PkgConfig::AVUTIL + PkgConfig::SWSCALE ) add_test(ffmpegExtractorTest ${CMAKE_BINARY_DIR}/autotests/ffmpegExtractorTest) @@ -52,9 +51,8 @@ target_link_libraries(Office2007ExtractorTest PUBLIC Qt${QT_VERSION_MAJOR}::Core Qt${QT_VERSION_MAJOR}::Test ukui-file-metadata - ${${QUAZIP_NAME}_LIBRARIES} ) -add_test(Office2007Extractortest ${CMAKE_BINARY_DIR}/autotests/OfficeExtractorTest) +add_test(Office2007Extractortest ${CMAKE_BINARY_DIR}/autotests/Office2007ExtractorTest) # # pdf test @@ -63,12 +61,11 @@ add_test(Office2007Extractortest ${CMAKE_BINARY_DIR}/autotests/OfficeExtractorTe add_executable(PdfExtractorTest ../src/extractors/pdf-extractor.cpp pdf-extractortest.cpp) -target_include_directories(PdfExtractorTest SYSTEM PRIVATE ${POPPLER_INCLUDE_DIRS}) target_link_libraries(PdfExtractorTest PUBLIC Qt${QT_VERSION_MAJOR}::Core Qt${QT_VERSION_MAJOR}::Test ukui-file-metadata - ${${POPPLER_NAME}_LIBRARIES} + PkgConfig::POPPLER ) add_test(PdfExtractortest ${CMAKE_BINARY_DIR}/autotests/PdfExtractorTest) @@ -79,12 +76,11 @@ add_test(PdfExtractortest ${CMAKE_BINARY_DIR}/autotests/PdfExtractorTest) add_executable(TextExtractorTest ../src/extractors/text-extractor.cpp text-extractortest.cpp) -target_include_directories(TextExtractorTest SYSTEM PRIVATE ${UCHARDET_INCLUDE_DIRS}) target_link_libraries(TextExtractorTest PUBLIC Qt${QT_VERSION_MAJOR}::Core Qt${QT_VERSION_MAJOR}::Test ukui-file-metadata - ${UCHARDET_LIBRARIES} + PkgConfig::UCHARDET ) add_test(TextExtractortest ${CMAKE_BINARY_DIR}/autotests/TextExtractorTest) @@ -99,7 +95,6 @@ target_link_libraries(OfdExtractorTest PUBLIC Qt${QT_VERSION_MAJOR}::Core Qt${QT_VERSION_MAJOR}::Test ukui-file-metadata - ${${QUAZIP_NAME}_LIBRARIES} ) add_test(OfdExtractorTest ${CMAKE_BINARY_DIR}/autotests/OfdExtractorTest) @@ -114,7 +109,6 @@ target_link_libraries(UofExtractorTest PUBLIC Qt${QT_VERSION_MAJOR}::Core Qt${QT_VERSION_MAJOR}::Test ukui-file-metadata - ${${QUAZIP_NAME}_LIBRARIES} ) add_test(UofExtractorTest ${CMAKE_BINARY_DIR}/autotests/UofExtractorTest) @@ -129,8 +123,8 @@ target_link_libraries(PngExtractorTest PUBLIC Qt${QT_VERSION_MAJOR}::Core Qt${QT_VERSION_MAJOR}::Test ukui-file-metadata - tesseract - leptonica + PkgConfig::TESSERACT + PkgConfig::LEPT ) add_test(PngExtractortest ${CMAKE_BINARY_DIR}/autotests/PngExtractorTest) @@ -141,12 +135,11 @@ add_test(PngExtractortest ${CMAKE_BINARY_DIR}/autotests/PngExtractorTest) add_executable(TaglibExtractorTest ../src/extractors/taglib-extractor.cpp taglib-extractortest.cpp) -target_include_directories(TaglibExtractorTest SYSTEM PRIVATE ${TAGLIB_INCLUDE_DIRS}) target_link_libraries(TaglibExtractorTest PUBLIC Qt${QT_VERSION_MAJOR}::Core Qt${QT_VERSION_MAJOR}::Test ukui-file-metadata - ${TAGLIB_LIBRARIES} + PkgConfig::TAGLIB ) add_test(TaglibExtractorTest ${CMAKE_BINARY_DIR}/autotests/TaglibExtractorTest) @@ -157,12 +150,10 @@ add_test(TaglibExtractorTest ${CMAKE_BINARY_DIR}/autotests/TaglibExtractorTest) add_executable(ImageExtractorTest ../src/extractors/image-extractor.cpp image-extractortest.cpp) -target_include_directories(ImageExtractorTest SYSTEM PRIVATE ${IMAGE_INCLUDE_DIRS}) target_link_libraries(ImageExtractorTest PUBLIC Qt${QT_VERSION_MAJOR}::Core Qt${QT_VERSION_MAJOR}::Test ukui-file-metadata - ${IMAGE_LIBRARIES} ) add_test(ImageExtractorTest ${CMAKE_BINARY_DIR}/autotests/ImageExtractorTest) @@ -192,3 +183,16 @@ target_link_libraries(BookMarkTest PUBLIC ukui-file-metadata ) add_test(BookMarkTest ${CMAKE_BINARY_DIR}/autotests/BookMarkTest) + +# +# zip reader test +# + +add_executable(ZipReaderTest + zip-readertest.cpp) +target_link_libraries(ZipReaderTest PUBLIC + Qt${QT_VERSION_MAJOR}::Core + Qt${QT_VERSION_MAJOR}::Test + ukui-file-metadata +) +add_test(ZipReaderTest ${CMAKE_BINARY_DIR}/autotests/ZipReaderTest) diff --git a/autotests/samplefiles/test_zipreader_cp437_default_names.zip b/autotests/samplefiles/test_zipreader_cp437_default_names.zip new file mode 100644 index 0000000000000000000000000000000000000000..a9de9607a73c22d38f3e54704d3db4c9d13f05ce Binary files /dev/null and b/autotests/samplefiles/test_zipreader_cp437_default_names.zip differ diff --git a/autotests/samplefiles/test_zipreader_gbk_names.zip b/autotests/samplefiles/test_zipreader_gbk_names.zip new file mode 100644 index 0000000000000000000000000000000000000000..b2d9bdf2af8bc21ef70137078ce839636a37c62e Binary files /dev/null and b/autotests/samplefiles/test_zipreader_gbk_names.zip differ diff --git a/autotests/samplefiles/test_zipreader_unicode_path_extra.zip b/autotests/samplefiles/test_zipreader_unicode_path_extra.zip new file mode 100644 index 0000000000000000000000000000000000000000..4b8e3373043e70521a4d4c26326eab99d0b1e58d Binary files /dev/null and b/autotests/samplefiles/test_zipreader_unicode_path_extra.zip differ diff --git a/autotests/samplefiles/test_zipreader_unicode_path_extra_bad_crc.zip b/autotests/samplefiles/test_zipreader_unicode_path_extra_bad_crc.zip new file mode 100644 index 0000000000000000000000000000000000000000..424d428984b6e3b50c012b398e011474d2186f72 Binary files /dev/null and b/autotests/samplefiles/test_zipreader_unicode_path_extra_bad_crc.zip differ diff --git a/autotests/samplefiles/test_zipreader_utf8_names.zip b/autotests/samplefiles/test_zipreader_utf8_names.zip new file mode 100644 index 0000000000000000000000000000000000000000..cf4e16d561918c5c738e8f3008ff73d878052991 Binary files /dev/null and b/autotests/samplefiles/test_zipreader_utf8_names.zip differ diff --git a/autotests/zip-readertest.cpp b/autotests/zip-readertest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..eb17c4bedae0f7f273076361356003ce2b607d0e --- /dev/null +++ b/autotests/zip-readertest.cpp @@ -0,0 +1,179 @@ +/* + * + * Copyright (C) 2026, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#include "zip-readertest.h" +#include "indexerextractortestsconfig.h" +#include "zip-reader.h" + +#include +#include +#include + +using namespace UkuiFileMetadata; + +static_assert(!std::is_copy_constructible::value, + "ZipReader must not be copy constructible"); +static_assert(!std::is_copy_assignable::value, + "ZipReader must not be copy assignable"); + +namespace { + +QString testFilePath(const QString &baseName, const QString &extension) +{ + return QLatin1String(INDEXER_TESTS_SAMPLE_FILES_PATH) + + QLatin1Char('/') + + baseName + + QLatin1Char('.') + + extension; +} + +class ScopedEnvironmentValue +{ +public: + ScopedEnvironmentValue(const char *name, const QByteArray &value) + : variableName(name) + , hadValue(qEnvironmentVariableIsSet(name)) + , previousValue(qgetenv(name)) + { + qputenv(variableName.constData(), value); + } + + ~ScopedEnvironmentValue() + { + if (hadValue) { + qputenv(variableName.constData(), previousValue); + } else { + qunsetenv(variableName.constData()); + } + } + +private: + QByteArray variableName; + bool hadValue = false; + QByteArray previousValue; +}; + +} + +void ZipReaderTest::testReadEntryReturnsFullContent() +{ + ZipReader reader(testFilePath(QStringLiteral("test_libreoffice"), QStringLiteral("docx"))); + QVERIFY(reader.open()); + + QByteArray data; + QVERIFY(reader.readEntry(QStringLiteral("word/document.xml"), &data, Qt::CaseSensitive)); + QVERIFY(!data.isEmpty()); + QVERIFY(data.contains("KFileMetaData")); +} + +void ZipReaderTest::testProcessEntryCanStopEarly() +{ + ZipReader reader(testFilePath(QStringLiteral("test_libreoffice"), QStringLiteral("docx"))); + QVERIFY(reader.open()); + + QByteArray prefix; + const bool ok = reader.processEntry(QStringLiteral("word/document.xml"), + [&prefix](QIODevice *device) { + prefix = device->read(32); + return prefix.size() == 32; + }, + Qt::CaseSensitive); + + QVERIFY(ok); + QCOMPARE(prefix.size(), 32); +} + +void ZipReaderTest::testCp437EntryNamesUseZipDefaultCodec() +{ + const QString expectedEntryName = QStringLiteral("ü@.txt"); + ZipReader reader(testFilePath(QStringLiteral("test_zipreader_cp437_default_names"), QStringLiteral("zip"))); + QVERIFY(reader.open()); + + QCOMPARE(reader.entryNames(), QStringList({expectedEntryName})); + + QByteArray data; + QVERIFY(reader.readEntry(expectedEntryName, &data)); + QCOMPARE(data, QByteArray("cp437-default-content")); +} + +void ZipReaderTest::testDefaultFallbackCodecsDoNotDependOnLocaleLanguage() +{ + ScopedEnvironmentValue lcAll("LC_ALL", QByteArrayLiteral("C.UTF-8")); + ScopedEnvironmentValue lang("LANG", QByteArrayLiteral("zh_CN.UTF-8")); + + ZipReader reader(testFilePath(QStringLiteral("test_zipreader_cp437_default_names"), QStringLiteral("zip"))); + QCOMPARE(reader.fallbackFileNameCodecs(), QList({QByteArrayLiteral("CP437")})); +} + +void ZipReaderTest::testUnicodePathExtraFieldTakesPrecedence() +{ + const QString expectedEntryName = QStringLiteral("目录/测试.txt"); + ZipReader reader(testFilePath(QStringLiteral("test_zipreader_unicode_path_extra"), QStringLiteral("zip"))); + reader.setFallbackFileNameCodecs({QByteArrayLiteral("CP437")}); + QVERIFY(reader.open()); + + QCOMPARE(reader.entryNames(), QStringList({expectedEntryName})); + + QByteArray data; + QVERIFY(reader.readEntry(expectedEntryName, &data)); + QCOMPARE(data, QByteArray("unicode-extra-content")); +} + +void ZipReaderTest::testInvalidUnicodePathExtraFieldFallsBackToConfiguredCodec() +{ + const QString expectedEntryName = QStringLiteral("目录/测试.txt"); + ZipReader reader(testFilePath(QStringLiteral("test_zipreader_unicode_path_extra_bad_crc"), QStringLiteral("zip"))); + reader.setFallbackFileNameCodecs({QByteArrayLiteral("GB18030")}); + QVERIFY(reader.open()); + + QCOMPARE(reader.entryNames(), QStringList({expectedEntryName})); + + QByteArray data; + QVERIFY(reader.readEntry(expectedEntryName, &data)); + QCOMPARE(data, QByteArray("unicode-extra-bad-crc-content")); +} + +void ZipReaderTest::testUtf8EntryNamesRoundTrip() +{ + const QString expectedEntryName = QStringLiteral("目录/测试.txt"); + ZipReader reader(testFilePath(QStringLiteral("test_zipreader_utf8_names"), QStringLiteral("zip"))); + QVERIFY(reader.open()); + + QCOMPARE(reader.entryNames(), QStringList({expectedEntryName})); + + QByteArray data; + QVERIFY(reader.readEntry(expectedEntryName, &data)); + QCOMPARE(data, QByteArray("utf8-content")); +} + +void ZipReaderTest::testLegacyEntryNamesUseFallbackCodecs() +{ + const QString expectedEntryName = QStringLiteral("目录/测试.txt"); + ZipReader reader(testFilePath(QStringLiteral("test_zipreader_gbk_names"), QStringLiteral("zip"))); + reader.setFallbackFileNameCodecs({QByteArrayLiteral("GB18030")}); + QVERIFY(reader.open()); + + QCOMPARE(reader.entryNames(), QStringList({expectedEntryName})); + + QByteArray data; + QVERIFY(reader.readEntry(expectedEntryName, &data)); + QCOMPARE(data, QByteArray("gbk-content")); +} + +QTEST_GUILESS_MAIN(ZipReaderTest) diff --git a/autotests/zip-readertest.h b/autotests/zip-readertest.h new file mode 100644 index 0000000000000000000000000000000000000000..16ea0bbc61fa201087d95fbeaee7e19b2fa1b53d --- /dev/null +++ b/autotests/zip-readertest.h @@ -0,0 +1,44 @@ +/* + * + * Copyright (C) 2026, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#ifndef UKUI_FILE_METADATA_ZIPREADERTEST_H +#define UKUI_FILE_METADATA_ZIPREADERTEST_H + +#include + +namespace UkuiFileMetadata { + +class ZipReaderTest : public QObject +{ + Q_OBJECT + +private Q_SLOTS: + void testReadEntryReturnsFullContent(); + void testProcessEntryCanStopEarly(); + void testCp437EntryNamesUseZipDefaultCodec(); + void testDefaultFallbackCodecsDoNotDependOnLocaleLanguage(); + void testUnicodePathExtraFieldTakesPrecedence(); + void testInvalidUnicodePathExtraFieldFallsBackToConfiguredCodec(); + void testUtf8EntryNamesRoundTrip(); + void testLegacyEntryNamesUseFallbackCodecs(); +}; + +} + +#endif // UKUI_FILE_METADATA_ZIPREADERTEST_H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d8c42732d7d576cdcd4be9d89e6b12e7573e1e59..a27f7a68692c167480a84aab7f4e142febadf190 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -33,6 +33,7 @@ set(HEADERS property-info.h thumbnail.h ocr-utils.h + zip-reader.h bookmark.h bookmarks-manager.h ) @@ -54,6 +55,8 @@ set(ukui-file-metadata_SRCS thumbnail-utils.h ocr-utils.cpp ocr-utils.h + zip-reader.cpp + zip-reader.h bookmark.h bookmarks-manager.cpp bookmarks-manager.h) @@ -62,14 +65,24 @@ add_library(ukui-file-metadata SHARED ${ukui-file-metadata_SRCS} ) -target_link_libraries(ukui-file-metadata PUBLIC Qt${QT_VERSION_MAJOR}::Core Qt${QT_VERSION_MAJOR}::Gui Qt${QT_VERSION_MAJOR}::Xml Qt${QT_VERSION_MAJOR}::Widgets tesseract) +target_link_libraries(ukui-file-metadata PUBLIC + Qt${QT_VERSION_MAJOR}::Core + Qt${QT_VERSION_MAJOR}::Gui + Qt${QT_VERSION_MAJOR}::Xml + Qt${QT_VERSION_MAJOR}::Widgets + PRIVATE + PkgConfig::TESSERACT + PkgConfig::LEPT + PkgConfig::MINIZIP) include(CMakePackageConfigHelpers) set(CMAKE_CONFIG_INSTALL_DIR "/usr/share/cmake/ukui-file-metadata") set(HEADERS_INSTALL_DIR /usr/include/ukui-file-metadata) set(PC_INSTALL_DIR "/usr/lib/pkgconfig") -target_include_directories(ukui-file-metadata PUBLIC $) +target_include_directories(ukui-file-metadata PUBLIC + $ + $) configure_package_config_file( ${CMAKE_CURRENT_SOURCE_DIR}/pkgconfig/ukui-file-metadata.pc.in ${CMAKE_CURRENT_BINARY_DIR}/ukui-file-metadata.pc @@ -91,6 +104,9 @@ set_target_properties(ukui-file-metadata PROPERTIES OUTPUT_NAME ukui-file-metadata ) +export(TARGETS ukui-file-metadata + FILE "${CMAKE_CURRENT_BINARY_DIR}/ukui-file-metadata-targets.cmake") + if(COMMAND qt_create_translation) qt_create_translation(QM_FILES ${CMAKE_SOURCE_DIR} ${TS_FILES}) else() diff --git a/src/extractors/CMakeLists.txt b/src/extractors/CMakeLists.txt index 5e13d3cda415beae230cbfbbe7c32380e697024e..124cffd588a05cb2c764365e8a571f876ae48d0c 100644 --- a/src/extractors/CMakeLists.txt +++ b/src/extractors/CMakeLists.txt @@ -1,23 +1,19 @@ include_directories(../) -if(AVCODEC_FOUND AND AVFORMAT_FOUND AND AVUTIL_FOUND AND SWSCALE_FOUND) - add_library(ukuifilemetadata_ffmpegextractor MODULE - ffmpeg-extractor.cpp - ) - target_include_directories(ukuifilemetadata_ffmpegextractor SYSTEM PRIVATE ${AVCODEC_INCLUDE_DIRS} ${AVFORMAT_INCLUDE_DIRS} ${AVUTIL_INCLUDE_DIRS} ${SWSCALE_INCLUDE_DIRS}) - target_link_libraries(ukuifilemetadata_ffmpegextractor - ukui-file-metadata - ${LIBAVCODEC_LIBRARIES} - ${LIBAVFORMAT_LIBRARIES} - ${LIBAVUTIL_LIBRARIES} - ${LIBSWSCALE_LIBRARIES} - ) - set_target_properties(ukuifilemetadata_ffmpegextractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/ukuifilemetadata") - install( - TARGETS ukuifilemetadata_ffmpegextractor - DESTINATION "${PLUGIN_INSTALL_DIR}") - -endif() +add_library(ukuifilemetadata_ffmpegextractor MODULE + ffmpeg-extractor.cpp + ) +target_link_libraries(ukuifilemetadata_ffmpegextractor + ukui-file-metadata + PkgConfig::AVCODEC + PkgConfig::AVFORMAT + PkgConfig::AVUTIL + PkgConfig::SWSCALE + ) +set_target_properties(ukuifilemetadata_ffmpegextractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/ukuifilemetadata") +install( + TARGETS ukuifilemetadata_ffmpegextractor + DESTINATION "${PLUGIN_INSTALL_DIR}") # #office (binary) @@ -37,8 +33,7 @@ install( add_library(ukuifilemetadata_office2007extractor MODULE office2007-extractor.cpp) target_link_libraries(ukuifilemetadata_office2007extractor - ukui-file-metadata - ${${QUAZIP_NAME}_LIBRARIES}) + ukui-file-metadata) set_target_properties(ukuifilemetadata_office2007extractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/ukuifilemetadata") install( TARGETS ukuifilemetadata_office2007extractor @@ -51,7 +46,7 @@ install( add_library(ukuifilemetadata_textextractor MODULE text-extractor.cpp) target_link_libraries(ukuifilemetadata_textextractor ukui-file-metadata - ${UCHARDET_LIBRARIES} + PkgConfig::UCHARDET ) set_target_properties(ukuifilemetadata_textextractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/ukuifilemetadata") install( @@ -65,7 +60,7 @@ install( add_library(ukuifilemetadata_pdfextractor MODULE pdf-extractor.cpp) target_link_libraries(ukuifilemetadata_pdfextractor ukui-file-metadata - ${${POPPLER_NAME}_LIBRARIES} + PkgConfig::POPPLER ) set_target_properties(ukuifilemetadata_pdfextractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/ukuifilemetadata") install( @@ -78,8 +73,7 @@ install( add_library(ukuifilemetadata_uofextractor MODULE uof-extractor.cpp) target_link_libraries(ukuifilemetadata_uofextractor - ukui-file-metadata - ${${QUAZIP_NAME}_LIBRARIES}) + ukui-file-metadata) set_target_properties(ukuifilemetadata_uofextractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/ukuifilemetadata") install( TARGETS ukuifilemetadata_uofextractor @@ -91,8 +85,7 @@ install( add_library(ukuifilemetadata_ofdextractor MODULE ofd-extractor.cpp) target_link_libraries(ukuifilemetadata_ofdextractor - ukui-file-metadata - ${${QUAZIP_NAME}_LIBRARIES}) + ukui-file-metadata) set_target_properties(ukuifilemetadata_ofdextractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/ukuifilemetadata") install( TARGETS ukuifilemetadata_ofdextractor @@ -117,7 +110,7 @@ install( add_library(ukuifilemetadata_taglibextractor MODULE taglib-extractor.cpp) target_link_libraries( ukuifilemetadata_taglibextractor ukui-file-metadata - ${TAGLIB_LIBRARIES} + PkgConfig::TAGLIB ) set_target_properties(ukuifilemetadata_taglibextractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/ukuifilemetadata") diff --git a/src/extractors/ofd-extractor.cpp b/src/extractors/ofd-extractor.cpp index 0f396cf16a20f1917ecc0254b988e823f8a35e0f..a976c6e351b87ece2103bf06985344eeefbdda23 100644 --- a/src/extractors/ofd-extractor.cpp +++ b/src/extractors/ofd-extractor.cpp @@ -20,15 +20,11 @@ */ #include "ofd-extractor.h" -#if QT_VERSION < QT_VERSION_CHECK(6,0,0) -#include "quazip5/quazip.h" -#include "quazip5/quazipfile.h" -#else -#include "quazip.h" -#include "quazipfile.h" -#endif +#include "zip-reader.h" +#include #include #include +#include #include #include @@ -59,107 +55,106 @@ void OfdExtractor::extract(ExtractionResult *result) { return; } - QuaZip zipfile(result->inputUrl()); - if (!zipfile.open(QuaZip::mdUnzip)) { + ZipReader archive(result->inputUrl()); + if (!archive.open()) { return; } result->addType(Type::Document); - - QXmlStreamReader reader; if (result->inputFlags() & ExtractionResult::Flag::ExtractMetaData) { - if (zipfile.setCurrentFile("OFD.xml")) { - QuaZipFile fileR(&zipfile); - if (fileR.open(QIODevice::ReadOnly)) { - reader.setDevice(&fileR); - QStringList keywords; - while (!reader.atEnd()) { - if (reader.readNextStartElement()) { - if (reader.name().toString() == "Title") { - result->add(Property::Title, reader.readElementText()); - continue; - } - if (reader.name().toString() == "Author") { - result->add(Property::Author, reader.readElementText()); - continue; - } - if (reader.name().toString() == "Subject") { - result->add(Property::Subject, reader.readElementText()); - continue; - } - if (reader.name().toString() == "Abstract") { - result->add(Property::Description, reader.readElementText()); - continue; - } - if (reader.name().toString() == "CreationDate") { - result->add(Property::CreationDate, reader.readElementText()); - continue; - } - if (reader.name().toString() == "Creator") { - result->add(Property::Generator, reader.readElementText()); - continue; - } - - if (reader.name().toString() == "Keyword") { - keywords.append(reader.readElementText()); - } + QByteArray xmlData; + if (archive.readEntry(QStringLiteral("OFD.xml"), &xmlData)) { + QXmlStreamReader reader(xmlData); + QStringList keywords; + while (!reader.atEnd()) { + if (reader.readNextStartElement()) { + if (reader.name().toString() == "Title") { + result->add(Property::Title, reader.readElementText()); + continue; + } + if (reader.name().toString() == "Author") { + result->add(Property::Author, reader.readElementText()); + continue; + } + if (reader.name().toString() == "Subject") { + result->add(Property::Subject, reader.readElementText()); + continue; + } + if (reader.name().toString() == "Abstract") { + result->add(Property::Description, reader.readElementText()); + continue; + } + if (reader.name().toString() == "CreationDate") { + result->add(Property::CreationDate, reader.readElementText()); + continue; + } + if (reader.name().toString() == "Creator") { + result->add(Property::Generator, reader.readElementText()); + continue; } - } - fileR.close(); - if (!keywords.isEmpty()) { - result->add(Property::Keywords, keywords); + if (reader.name().toString() == "Keyword") { + keywords.append(reader.readElementText()); + } } } + + if (!keywords.isEmpty()) { + result->add(Property::Keywords, keywords); + } } } if (!(result->inputFlags() & ExtractionResult::Flag::ExtractPlainText)) { return; } - // GB/T 33190-2016规范定义可以存在多个Doc_x目录,暂时只取第一个目录的内容 - QString prefix("Doc_0/Pages/"); - QStringList fileList; - for (const auto &file: zipfile.getFileNameList()) { - if (file.startsWith(prefix)) { - fileList << file; + + // GB/T 33190-2016允许多个 Doc_x 目录,这里保持现有行为,仅处理 Doc_0。 + const QRegularExpression pagePattern(QStringLiteral("^Doc_0/Pages/Page_(\\d+)/Content\\.xml$")); + QStringList pageEntries; + for (const auto &file : archive.entryNames()) { + if (pagePattern.match(file).hasMatch()) { + pageEntries << file; } } + std::sort(pageEntries.begin(), pageEntries.end(), [&pagePattern](const QString &left, const QString &right) { + const auto leftMatch = pagePattern.match(left); + const auto rightMatch = pagePattern.match(right); + return leftMatch.captured(1).toInt() < rightMatch.captured(1).toInt(); + }); QString textContent; - for (int i = 0; i < fileList.count(); ++i) { - QString filename = prefix + "Page_" + QString::number(i) + "/Content.xml"; - if (!zipfile.setCurrentFile(filename)) { - continue; - } - QuaZipFile fileR(&zipfile); - if (!fileR.open(QIODevice::ReadOnly)) { + for (const QString &filename : pageEntries) { + bool limitReached = false; + if (!archive.processEntry(filename, + [&textContent, &limitReached](QIODevice *device) { + QXmlStreamReader reader(device); + while (!reader.atEnd() && !reader.hasError()) { + if (!reader.readNextStartElement()) { + if (reader.hasError()) { + qWarning() << reader.errorString() << reader.error() << "line:" << reader.lineNumber() << "column:" << reader.columnNumber(); + break; + } + } + + if (reader.name().toString() == "TextCode") { + textContent.append(reader.readElementText()); + if (textContent.length() >= MAX_CONTENT_LENGTH / 3) { + limitReached = true; + break; + } + } + } + return true; + })) { continue; } - reader.setDevice(&fileR); - - while (!reader.atEnd() && !reader.hasError()) { - if (!reader.readNextStartElement()) { - if (reader.hasError()) { - qWarning() << reader.errorString() << reader.error() << "line:" << reader.lineNumber() << "column:" << reader.columnNumber(); - break; - } - } - - if (reader.name().toString() == "TextCode") { - textContent.append(reader.readElementText()); - if (textContent.length() >= MAX_CONTENT_LENGTH / 3) { - fileR.close(); - zipfile.close(); - result->append(textContent); - return; - } - } + if (limitReached) { + result->append(textContent); + return; } - fileR.close(); } - zipfile.close(); result->append(textContent); } diff --git a/src/extractors/office2007-extractor.cpp b/src/extractors/office2007-extractor.cpp index 8b6dd8b95a31cfc5048c3edfa7a2399f62ac8d0f..9623903ef544abb3be9f15b957d917f997db7764 100644 --- a/src/extractors/office2007-extractor.cpp +++ b/src/extractors/office2007-extractor.cpp @@ -20,24 +20,37 @@ */ #include "office2007-extractor.h" #include "thumbnail-utils.h" +#include "zip-reader.h" #include -#if QT_VERSION < QT_VERSION_CHECK(6,0,0) -#include "quazip5/quazip.h" -#include "quazip5/quazipfile.h" -#else -#include "quazip.h" -#include "quazipfile.h" -#include -#endif -#include #include +#include +#include using namespace UkuiFileMetadata; static const QString VERSION = "1.0"; static const QString PLUGIN_NAME = "Office2007"; +namespace { + +bool appendOfficeXmlText(ZipReader &archive, const QString &entryName, ExtractionResult *result) +{ + return archive.processEntry(entryName, + [result](QIODevice *device) { + QXmlStreamReader reader(device); + while (!reader.atEnd()) { + if (reader.readNextStartElement() && reader.name().toString() == QLatin1String("t")) { + result->append(reader.readElementText()); + } + } + return true; + }, + Qt::CaseSensitive); +} + +} + const QStringList supportedMimeTypes = { // Ambiguous legacy Office MIME aliases can still point to OOXML content // when the filename keeps an old suffix such as .doc/.xls/.ppt. @@ -76,21 +89,18 @@ void Office2007Extractor::extract(ExtractionResult *result) return; } - QuaZip file(result->inputUrl()); - if (!file.open(QuaZip::mdUnzip)) { + ZipReader archive(result->inputUrl()); + if (!archive.open()) { return; } - QuaZipFile fileR(&file); - QXmlStreamReader reader(&fileR); - if (result->inputFlags() & ExtractionResult::ExtractThumbnail) { QString thumbnailPath; - bool needExtract = ThumbnailUtils::needGenerateThumbnail(result, PLUGIN_NAME, VERSION) && - file.setCurrentFile(QStringLiteral("_rels/.rels"), QuaZip::csSensitive) && - fileR.open(QIODevice::ReadOnly); - + QByteArray relsData; + const bool needExtract = ThumbnailUtils::needGenerateThumbnail(result, PLUGIN_NAME, VERSION) && + archive.readEntry(QStringLiteral("_rels/.rels"), &relsData, Qt::CaseSensitive); if (needExtract) { + QXmlStreamReader reader(relsData); while (!reader.atEnd()) { if (reader.readNextStartElement() && reader.name().toString() == QLatin1String("Relationship")) { const auto attributes = reader.attributes(); @@ -100,23 +110,22 @@ void Office2007Extractor::extract(ExtractionResult *result) } } } - fileR.close(); } if (!thumbnailPath.isEmpty()) { - if (file.setCurrentFile(thumbnailPath) && fileR.open(QIODevice::ReadOnly)) { + QByteArray thumbnailData; + if (archive.readEntry(thumbnailPath, &thumbnailData, Qt::CaseSensitive)) { QImage thumbnail; - thumbnail.loadFromData(fileR.readAll()); + thumbnail.loadFromData(thumbnailData); ThumbnailUtils::setThumbnail(result, thumbnail, PLUGIN_NAME, VERSION); - fileR.close(); } } } if (result->inputFlags() & ExtractionResult::ExtractMetaData) { - if (file.setCurrentFile("docProps/core.xml", QuaZip::csSensitive) and fileR.open(QIODevice::ReadOnly)) { - reader.clear(); - reader.setDevice(&fileR); + QByteArray xmlData; + if (archive.readEntry(QStringLiteral("docProps/core.xml"), &xmlData, Qt::CaseSensitive)) { + QXmlStreamReader reader(xmlData); while (!reader.atEnd()) { if (reader.readNextStartElement()) { if (reader.name().toString() == "description") { @@ -164,12 +173,10 @@ void Office2007Extractor::extract(ExtractionResult *result) } } } - fileR.close(); } - if (file.setCurrentFile("docProps/app.xml", QuaZip::csSensitive) and fileR.open(QIODevice::ReadOnly)) { - reader.clear(); - reader.setDevice(&fileR); + if (archive.readEntry(QStringLiteral("docProps/app.xml"), &xmlData, Qt::CaseSensitive)) { + QXmlStreamReader reader(xmlData); while (!reader.atEnd()) { if (reader.readNextStartElement()) { if (this->getSupportedMimeType(result->inputMimetype()) == @@ -197,67 +204,36 @@ void Office2007Extractor::extract(ExtractionResult *result) } } } - fileR.close(); } } //extract document content if (result->inputFlags() & ExtractionResult::ExtractPlainText) { //word - if (file.setCurrentFile("word/document.xml", QuaZip::csSensitive) and fileR.open(QIODevice::ReadOnly)) { - reader.clear(); - reader.setDevice(&fileR); + if (appendOfficeXmlText(archive, QStringLiteral("word/document.xml"), result)) { result->addType(Type::Document); - while (!reader.atEnd()) { - if (reader.readNextStartElement() and reader.name().toString() == "t") { - result->append(reader.readElementText()); - } - } - fileR.close(); //excel - } else if (file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive) and fileR.open(QIODevice::ReadOnly)) { - reader.clear(); - reader.setDevice(&fileR); + } else if (appendOfficeXmlText(archive, QStringLiteral("xl/sharedStrings.xml"), result)) { result->addType(Type::Document); result->addType(Type::Spreadsheet); - while (!reader.atEnd()) { - if (reader.readNextStartElement() and reader.name().toString() == "t") { - result->append(reader.readElementText()); - } - } - fileR.close(); } else { //powerpoint QStringList slideXmlList; - for (const QString &slideFile : file.getFileNameList()) { -#if QT_VERSION < QT_VERSION_CHECK(6,0,0) - if (slideFile.contains(QRegExp("ppt/slides/slide*"))) { -#else - if (slideFile.contains(QRegularExpression("ppt/slides/slide*"))) { -#endif + const QRegularExpression slidePattern(QStringLiteral("^ppt/slides/slide[^/]*$")); + for (const QString &slideFile : archive.entryNames()) { + if (slidePattern.match(slideFile).hasMatch()) { slideXmlList << slideFile; } } if (!slideXmlList.isEmpty()) { result->addType(Type::Document); result->addType(Type::Presentation); - for (QString slideXmlFile : slideXmlList) { - if (file.setCurrentFile(slideXmlFile, QuaZip::csSensitive) and fileR.open(QIODevice::ReadOnly)) { - reader.clear(); - reader.setDevice(&fileR); - while (!reader.atEnd()) { - if(reader.readNextStartElement() and reader.name().toString() == "t"){ - result->append(reader.readElementText()); - } - } - fileR.close(); - } + for (const QString &slideXmlFile : slideXmlList) { + appendOfficeXmlText(archive, slideXmlFile, result); } } } } - - file.close(); } QStringList Office2007Extractor::mimetypes() const diff --git a/src/extractors/uof-extractor.cpp b/src/extractors/uof-extractor.cpp index cb6eabaec5d258ca6106e72b543ff67cbad57bbf..07d5715376c2d2c89beec5e7c69a767be6f5a159 100644 --- a/src/extractors/uof-extractor.cpp +++ b/src/extractors/uof-extractor.cpp @@ -20,13 +20,7 @@ */ #include "uof-extractor.h" -#if QT_VERSION < QT_VERSION_CHECK(6,0,0) -#include "quazip5/quazip.h" -#include "quazip5/quazipfile.h" -#else -#include "quazip.h" -#include "quazipfile.h" -#endif +#include "zip-reader.h" #include #include #include @@ -64,54 +58,50 @@ void UofExtractor::extract(ExtractionResult *result) { } else { //参考标准 GJB/Z 165-2012 https://www.doc88.com/p-9089133923912.html //解析UOF2.0文件的元数据 - QuaZip file(result->inputUrl()); - if (!file.open(QuaZip::mdUnzip)) { + ZipReader archive(result->inputUrl()); + if (!archive.open()) { return; } - QuaZipFile fileR; - QXmlStreamReader reader; //parse meta info - if ((result->inputFlags() & ExtractionResult::Flag::ExtractMetaData) && file.setCurrentFile("_meta/meta.xml")) { - fileR.setZip(&file); - if (fileR.open(QIODevice::ReadOnly)) { - reader.setDevice(&fileR); - while (!reader.atEnd()) { - if (reader.readNextStartElement()) { - if (reader.name().toString() == "作者_5204") { - result->add(Property::Author, reader.readElementText()); - continue; - } - if (reader.name().toString() == "标题_5201") { - result->add(Property::Title, reader.readElementText()); - continue; - } - if (reader.name().toString() == "主题_5202") { - result->add(Property::Subject, reader.readElementText()); - continue; - } - if (reader.name().toString() == "摘要_5206") { - result->add(Property::Description, reader.readElementText()); - continue; - } - if (reader.name().toString() == "创建日期_5207") { - result->add(Property::CreationDate, reader.readElementText()); - continue; - } - if (reader.name().toString() == "创建应用程序_520A") { - result->add(Property::Generator, reader.readElementText()); - continue; - } - if (reader.name().toString() == "页数_5215") { - result->add(Property::PageCount, reader.readElementText()); - continue; - } - if (reader.name().toString() == "字数_5216") { - result->add(Property::WordCount, reader.readElementText()); - } + QByteArray xmlData; + if ((result->inputFlags() & ExtractionResult::Flag::ExtractMetaData) + && archive.readEntry(QStringLiteral("_meta/meta.xml"), &xmlData)) { + QXmlStreamReader reader(xmlData); + while (!reader.atEnd()) { + if (reader.readNextStartElement()) { + if (reader.name().toString() == "作者_5204") { + result->add(Property::Author, reader.readElementText()); + continue; + } + if (reader.name().toString() == "标题_5201") { + result->add(Property::Title, reader.readElementText()); + continue; + } + if (reader.name().toString() == "主题_5202") { + result->add(Property::Subject, reader.readElementText()); + continue; + } + if (reader.name().toString() == "摘要_5206") { + result->add(Property::Description, reader.readElementText()); + continue; + } + if (reader.name().toString() == "创建日期_5207") { + result->add(Property::CreationDate, reader.readElementText()); + continue; + } + if (reader.name().toString() == "创建应用程序_520A") { + result->add(Property::Generator, reader.readElementText()); + continue; + } + if (reader.name().toString() == "页数_5215") { + result->add(Property::PageCount, reader.readElementText()); + continue; + } + if (reader.name().toString() == "字数_5216") { + result->add(Property::WordCount, reader.readElementText()); } } - fileR.close(); } } @@ -121,29 +111,25 @@ void UofExtractor::extract(ExtractionResult *result) { } if (suffix == "uot" ||suffix == "uos") { - if (file.setCurrentFile("content.xml")) { - fileR.setZip(&file); - if (!fileR.open(QIODevice::ReadOnly)) { - file.close(); - return; - } - reader.setDevice(&fileR); - - QString textContent; - while (!reader.atEnd()) { - if (reader.readNextStartElement() && reader.name().toString() == "文本串_415B") { - textContent.append(reader.readElementText()); - if (textContent.length() >= MAX_CONTENT_LENGTH / 3) { - break; - } - } - } - fileR.close(); - file.close(); + QString textContent; + if (archive.processEntry(QStringLiteral("content.xml"), + [&textContent](QIODevice *device) { + QXmlStreamReader reader(device); + while (!reader.atEnd()) { + if (reader.readNextStartElement() + && reader.name().toString() == "文本串_415B") { + textContent.append(reader.readElementText()); + if (textContent.length() >= MAX_CONTENT_LENGTH / 3) { + break; + } + } + } + return true; + })) { result->append(textContent); } } else if (suffix == "uop") { - parsePptOfUof2(result); + parsePptOfUof2(result, archive); } } @@ -371,37 +357,26 @@ void UofExtractor::parseUofFile(ExtractionResult *result) { file.close(); } -bool loadZipFileToDoc(QuaZip &zipFile, QDomDocument &doc, const QString &fileName) +bool loadZipFileToDoc(ZipReader &zipReader, QDomDocument &doc, const QString &fileName) { - if (!zipFile.isOpen() && !zipFile.open(QuaZip::mdUnzip)) { - return false; - } - - if (!zipFile.setCurrentFile(fileName)) { - return false; - } - - QuaZipFile file(&zipFile); - if (!file.open(QIODevice::ReadOnly)) { + QByteArray xmlData; + if (!zipReader.readEntry(fileName, &xmlData)) { return false; } doc.clear(); - if (!doc.setContent(&file)) { - file.close(); + if (!doc.setContent(xmlData)) { return false; } - file.close(); return true; } //ppt文档的内容存放在graphics.xml中,需要先解析content中的引用再解析graphics内容 -void UofExtractor::parsePptOfUof2(ExtractionResult *result) { - QuaZip zipFile(result->inputUrl()); +void UofExtractor::parsePptOfUof2(ExtractionResult *result, ZipReader &zipReader) { QDomDocument doc; - if (!loadZipFileToDoc(zipFile, doc, "content.xml")) { + if (!loadZipFileToDoc(zipReader, doc, "content.xml")) { return; } @@ -426,7 +401,7 @@ void UofExtractor::parsePptOfUof2(ExtractionResult *result) { return; } - if (!loadZipFileToDoc(zipFile, doc, "graphics.xml")) { + if (!loadZipFileToDoc(zipReader, doc, "graphics.xml")) { return; } @@ -459,4 +434,4 @@ void UofExtractor::parsePptOfUof2(ExtractionResult *result) { } } result->append(textContent); -} \ No newline at end of file +} diff --git a/src/extractors/uof-extractor.h b/src/extractors/uof-extractor.h index c64a9ce1ca033088bfc0e63f4394080bce3fb65e..ffb18960f94f0a7ff20525c493f4cb9a459c006f 100644 --- a/src/extractors/uof-extractor.h +++ b/src/extractors/uof-extractor.h @@ -25,6 +25,9 @@ #include "extractor-plugin.h" namespace UkuiFileMetadata { + +class ZipReader; + class UofExtractor : public ExtractorPlugin { Q_OBJECT @@ -38,7 +41,7 @@ public: QStringList mimetypes() const override; private: void parseUofFile(ExtractionResult *result); - void parsePptOfUof2(ExtractionResult *result); + void parsePptOfUof2(ExtractionResult *result, ZipReader &zipReader); friend class UofExtractorTest; }; diff --git a/src/ukui-file-metadata-config.cmake.in b/src/ukui-file-metadata-config.cmake.in index 7e42a19063e095e552b8c17d348ef66a3d494c30..2fbf32899808bec848b10e8db9427e651407e3f5 100644 --- a/src/ukui-file-metadata-config.cmake.in +++ b/src/ukui-file-metadata-config.cmake.in @@ -1,9 +1,6 @@ @PACKAGE_INIT@ include(CMakeFindDependencyMacro) -find_dependency(Qt@QT_VERSION_MAJOR@Core "@REQUIRED_QT_VERSION@") -if(TARGET Qt6::Core) - find_dependency(Qt6Core5Compat @REQUIRED_QT_VERSION@) -endif() +find_dependency(Qt@QT_VERSION_MAJOR@ "@REQUIRED_QT_VERSION@" COMPONENTS Core Gui Xml Widgets) -include("${CMAKE_CURRENT_LIST_DIR}/ukui-file-metadata-targets.cmake") \ No newline at end of file +include("${CMAKE_CURRENT_LIST_DIR}/ukui-file-metadata-targets.cmake") diff --git a/src/zip-reader.cpp b/src/zip-reader.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e0a37a9df40cf889ebfa2da9903e0465721eea58 --- /dev/null +++ b/src/zip-reader.cpp @@ -0,0 +1,641 @@ +/* + * + * Copyright (C) 2026, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#include "zip-reader.h" + +#include +#if QT_VERSION >= QT_VERSION_CHECK(6, 0, 0) +#include +#else +#include +#endif +#include +#include +#include + +#include +#include + +namespace UkuiFileMetadata { + +namespace { + +constexpr int kReadBufferSize = 8192; +constexpr uLong kUtf8NameFlag = 1u << 11; +constexpr quint16 kUnicodePathExtraFieldId = 0x7075; +constexpr quint8 kUnicodePathExtraFieldVersion = 1; + +struct EntryRecord +{ + QByteArray rawName; + QString decodedName; + unz64_file_pos position {}; + quint64 uncompressedSize = 0; + uLong flags = 0; +}; + +void appendUniqueCodecName(QList *codecNames, const QByteArray &codecName) +{ + if (codecNames == nullptr) { + return; + } + + const QByteArray trimmedName = codecName.trimmed(); + if (trimmedName.isEmpty()) { + return; + } + + if (!codecNames->contains(trimmedName)) { + codecNames->append(trimmedName); + } +} + +QList normalizeCodecNames(const QList &codecNames) +{ + QList normalizedNames; + for (const QByteArray &codecName : codecNames) { + appendUniqueCodecName(&normalizedNames, codecName); + } + return normalizedNames; +} + +QByteArray systemCodecName() +{ +#if QT_VERSION >= QT_VERSION_CHECK(6, 0, 0) + const char *codecName = QStringConverter::nameForEncoding(QStringConverter::System); + return codecName ? QByteArray(codecName) : QByteArray(); +#else + const QTextCodec *localeCodec = QTextCodec::codecForLocale(); + return localeCodec ? localeCodec->name() : QByteArray(); +#endif +} + +QList defaultFallbackCodecNames() +{ + QList codecNames; + appendUniqueCodecName(&codecNames, QByteArrayLiteral("CP437")); + return codecNames; +} + +quint16 readLittleEndian16(const uchar *data) +{ + return static_cast(data[0]) + | (static_cast(data[1]) << 8); +} + +quint32 readLittleEndian32(const uchar *data) +{ + return static_cast(data[0]) + | (static_cast(data[1]) << 8) + | (static_cast(data[2]) << 16) + | (static_cast(data[3]) << 24); +} + +quint32 rawNameCrc32(const QByteArray &rawName) +{ + const auto *bytes = reinterpret_cast(rawName.constData()); + return crc32(0L, bytes, static_cast(rawName.size())); +} + +bool tryDecodeWithCodec(const QByteArray &rawName, const char *codecName, QString *decodedName) +{ + if (codecName == nullptr || decodedName == nullptr) { + return false; + } + +#if QT_VERSION >= QT_VERSION_CHECK(6, 0, 0) + QStringDecoder decoder(codecName); + if (!decoder.isValid()) { + return false; + } + + const QString text = decoder(rawName); + if (decoder.hasError()) { + return false; + } + + QStringEncoder encoder(codecName); + if (!encoder.isValid()) { + return false; + } + + const QByteArray roundTrip = encoder(text); + if (roundTrip != rawName) { + return false; + } +#else + QTextCodec *codec = QTextCodec::codecForName(codecName); + if (codec == nullptr) { + return false; + } + + QTextCodec::ConverterState decodeState; + const QString text = codec->toUnicode(rawName.constData(), rawName.size(), &decodeState); + if (decodeState.invalidChars != 0) { + return false; + } + + if (codec->fromUnicode(text) != rawName) { + return false; + } +#endif + + *decodedName = text; + return true; +} + +bool tryDecodeUnicodePathExtraField(const QByteArray &rawName, + const QByteArray &extraField, + QString *decodedName) +{ + if (decodedName == nullptr || extraField.isEmpty()) { + return false; + } + + const auto *extraBytes = reinterpret_cast(extraField.constData()); + int offset = 0; + while (offset + 4 <= extraField.size()) { + const quint16 headerId = readLittleEndian16(extraBytes + offset); + const quint16 dataSize = readLittleEndian16(extraBytes + offset + 2); + offset += 4; + + if (offset + dataSize > extraField.size()) { + break; + } + + if (headerId == kUnicodePathExtraFieldId) { + if (dataSize < 5 || extraBytes[offset] != kUnicodePathExtraFieldVersion) { + offset += dataSize; + continue; + } + + const quint32 storedNameCrc = readLittleEndian32(extraBytes + offset + 1); + if (storedNameCrc != rawNameCrc32(rawName)) { + offset += dataSize; + continue; + } + + const QByteArray utf8Name(reinterpret_cast(extraBytes + offset + 5), + dataSize - 5); + return tryDecodeWithCodec(utf8Name, "UTF-8", decodedName); + } + + offset += dataSize; + } + + return false; +} + +class EntryDevice final : public QIODevice +{ +public: + explicit EntryDevice(unzFile archiveHandle, quint64 uncompressedSizeValue) + : archive(archiveHandle) + , uncompressedSize(uncompressedSizeValue) + { + } + + bool isSequential() const override + { + return true; + } + + qint64 size() const override + { + if (uncompressedSize > static_cast(std::numeric_limits::max())) { + return -1; + } + return static_cast(uncompressedSize); + } + + bool reachedEnd() const + { + return endReached; + } + + bool hasReadError() const + { + return readError; + } + +protected: + qint64 readData(char *data, qint64 maxSize) override + { + if (archive == nullptr || maxSize <= 0) { + return 0; + } + + const auto chunkSize = static_cast(std::min( + maxSize, static_cast(std::numeric_limits::max()))); + const int bytesRead = unzReadCurrentFile(archive, data, chunkSize); + if (bytesRead < 0) { + readError = true; + setErrorString(QStringLiteral("Failed to read ZIP entry data")); + return -1; + } + + if (bytesRead == 0) { + endReached = true; + } + + return bytesRead; + } + + qint64 writeData(const char *, qint64) override + { + return -1; + } + +private: + unzFile archive = nullptr; + quint64 uncompressedSize = 0; + bool endReached = false; + bool readError = false; +}; + +} + +class ZipReader::Private +{ +public: + explicit Private(QString archivePathValue) + : archivePath(std::move(archivePathValue)) + , fallbackCodecs(defaultFallbackCodecNames()) + { + } + + ~Private() + { + close(); + } + + void setArchivePath(const QString &archivePathValue) + { + if (archivePath == archivePathValue) { + return; + } + + close(); + archivePath = archivePathValue; + } + + QList fallbackFileNameCodecs() const + { + return fallbackCodecs; + } + + void setFallbackFileNameCodecs(const QList &codecNames) + { + const QList normalizedCodecs = normalizeCodecNames(codecNames); + if (fallbackCodecs == normalizedCodecs) { + return; + } + + close(); + fallbackCodecs = normalizedCodecs; + } + + bool open() + { + if (archive != nullptr) { + return true; + } + + if (archivePath.isEmpty()) { + return false; + } + + const QByteArray encodedPath = QFile::encodeName(archivePath); + auto *encodedArchivePath = const_cast(encodedPath.constData()); + archive = unzOpen64(encodedArchivePath); + if (archive == nullptr) { + archive = unzOpen(encodedArchivePath); + } + if (archive == nullptr) { + return false; + } + + entryRecordsCached = false; + entryRecords.clear(); + + const int result = unzGoToFirstFile(archive); + if (result != UNZ_OK && result != UNZ_END_OF_LIST_OF_FILE) { + close(); + return false; + } + + return true; + } + + bool isOpen() const + { + return archive != nullptr; + } + + void close() + { + if (archive != nullptr) { + unzClose(archive); + archive = nullptr; + } + + entryRecordsCached = false; + entryRecords.clear(); + } + + QStringList entryNamesList() + { + if (!rebuildEntryRecords()) { + return {}; + } + + QStringList entryNames; + entryNames.reserve(entryRecords.size()); + for (const EntryRecord &entryRecord : entryRecords) { + entryNames.append(entryRecord.decodedName); + } + return entryNames; + } + + bool readEntry(const QString &entryName, QByteArray *data, + Qt::CaseSensitivity caseSensitivity) + { + if (data == nullptr) { + return false; + } + + data->clear(); + return processEntry(entryName, + [data](QIODevice *device) { + const qint64 entrySize = device->size(); + if (entrySize > 0 + && entrySize <= static_cast(std::numeric_limits::max())) { + data->reserve(static_cast(entrySize)); + } + + while (true) { + const QByteArray chunk = device->read(kReadBufferSize); + if (chunk.isEmpty()) { + break; + } + data->append(chunk); + } + return true; + }, caseSensitivity); + } + + bool processEntry(const QString &entryName, const std::function &processor, + Qt::CaseSensitivity caseSensitivity) + { + unz_file_info64 fileInfo {}; + if (!processor || !openEntry(entryName, caseSensitivity, &fileInfo)) { + return false; + } + + EntryDevice device(archive, fileInfo.uncompressed_size); + device.open(QIODevice::ReadOnly); + + const bool processed = processor(&device) && !device.hasReadError(); + + device.close(); + const bool closed = closeCurrentEntry(device.reachedEnd()); + return processed && closed; + } + + QString decodeEntryName(const QByteArray &rawName, + const QByteArray &extraField, + uLong flags) const + { + QString decodedName; + if (tryDecodeUnicodePathExtraField(rawName, extraField, &decodedName)) { + return decodedName; + } + + if ((flags & kUtf8NameFlag) != 0 + && tryDecodeWithCodec(rawName, "UTF-8", &decodedName)) { + return decodedName; + } + + const QByteArray localeCodecName = systemCodecName(); + if (!localeCodecName.isEmpty() && tryDecodeWithCodec(rawName, localeCodecName.constData(), &decodedName)) { + return decodedName; + } + + for (const QByteArray &codecName : fallbackCodecs) { + if (tryDecodeWithCodec(rawName, codecName.constData(), &decodedName)) { + return decodedName; + } + } + + return QString::fromLatin1(rawName.toHex()); + } + + const EntryRecord *findEntryRecord(const QString &entryName, + Qt::CaseSensitivity caseSensitivity) + { + if (!rebuildEntryRecords()) { + return nullptr; + } + + const auto it = std::find_if(entryRecords.cbegin(), entryRecords.cend(), + [&entryName, caseSensitivity](const EntryRecord &entryRecord) { + return QString::compare(entryRecord.decodedName, entryName, + caseSensitivity) == 0; + }); + if (it == entryRecords.cend()) { + return nullptr; + } + + return &(*it); + } + + bool openEntry(const QString &entryName, Qt::CaseSensitivity caseSensitivity, unz_file_info64 *fileInfo) + { + if (fileInfo == nullptr || !open()) { + return false; + } + + const EntryRecord *entryRecord = findEntryRecord(entryName, caseSensitivity); + if (entryRecord == nullptr) { + return false; + } + + if (unzGoToFilePos64(archive, &entryRecord->position) != UNZ_OK) { + return false; + } + + if (unzGetCurrentFileInfo64(archive, fileInfo, nullptr, 0, nullptr, 0, nullptr, 0) != UNZ_OK) { + return false; + } + + return unzOpenCurrentFile(archive) == UNZ_OK; + } + + bool closeCurrentEntry(bool requireCompleteRead = true) + { + const int closeResult = unzCloseCurrentFile(archive); + return !requireCompleteRead || closeResult == UNZ_OK; + } + + bool rebuildEntryRecords() + { + if (entryRecordsCached) { + return true; + } + + if (!open()) { + return false; + } + + unz64_file_pos savedPosition {}; + const bool hasSavedPosition = unzGetFilePos64(archive, &savedPosition) == UNZ_OK; + + QList refreshedEntryRecords; + int result = unzGoToFirstFile(archive); + while (result == UNZ_OK) { + EntryRecord entryRecord; + if (!currentEntryRecord(&entryRecord)) { + return false; + } + refreshedEntryRecords.append(entryRecord); + result = unzGoToNextFile(archive); + } + + if (result != UNZ_END_OF_LIST_OF_FILE) { + return false; + } + + if (hasSavedPosition) { + unzGoToFilePos64(archive, &savedPosition); + } else { + unzGoToFirstFile(archive); + } + + entryRecords = std::move(refreshedEntryRecords); + entryRecordsCached = true; + return true; + } + + bool currentEntryRecord(EntryRecord *entryRecord) const + { + if (archive == nullptr || entryRecord == nullptr) { + return false; + } + + unz_file_info64 fileInfo {}; + if (unzGetCurrentFileInfo64(archive, &fileInfo, nullptr, 0, nullptr, 0, nullptr, 0) != UNZ_OK) { + return false; + } + + QByteArray rawName(static_cast(fileInfo.size_filename) + 1, '\0'); + QByteArray extraField(static_cast(fileInfo.size_file_extra), '\0'); + if (unzGetCurrentFileInfo64(archive, + nullptr, + rawName.data(), + rawName.size(), + extraField.isEmpty() ? nullptr : extraField.data(), + extraField.size(), + nullptr, + 0) + != UNZ_OK) { + return false; + } + + if (unzGetFilePos64(archive, &entryRecord->position) != UNZ_OK) { + return false; + } + + rawName.chop(1); + entryRecord->rawName = rawName; + entryRecord->decodedName = decodeEntryName(entryRecord->rawName, extraField, fileInfo.flag); + entryRecord->uncompressedSize = fileInfo.uncompressed_size; + entryRecord->flags = fileInfo.flag; + return true; + } + + QString archivePath; + unzFile archive = nullptr; + QList entryRecords; + bool entryRecordsCached = false; + QList fallbackCodecs; +}; + +ZipReader::ZipReader(QString archivePath) + : d(new Private(std::move(archivePath))) +{ +} + +ZipReader::~ZipReader() +{ +} + +void ZipReader::setArchivePath(const QString &archivePath) +{ + d->setArchivePath(archivePath); +} + +QString ZipReader::archivePath() const +{ + return d->archivePath; +} + +QList ZipReader::fallbackFileNameCodecs() const +{ + return d->fallbackFileNameCodecs(); +} + +void ZipReader::setFallbackFileNameCodecs(const QList &codecNames) +{ + d->setFallbackFileNameCodecs(codecNames); +} + +bool ZipReader::open() +{ + return d->open(); +} + +bool ZipReader::isOpen() const +{ + return d->isOpen(); +} + +void ZipReader::close() +{ + d->close(); +} + +QStringList ZipReader::entryNames() +{ + return d->entryNamesList(); +} + +bool ZipReader::readEntry(const QString &entryName, QByteArray *data, + Qt::CaseSensitivity caseSensitivity) +{ + return d->readEntry(entryName, data, caseSensitivity); +} + +bool ZipReader::processEntry(const QString &entryName, + const std::function &processor, + Qt::CaseSensitivity caseSensitivity) +{ + return d->processEntry(entryName, processor, caseSensitivity); +} + +} // namespace UkuiFileMetadata diff --git a/src/zip-reader.h b/src/zip-reader.h new file mode 100644 index 0000000000000000000000000000000000000000..b51122efeaddc84e76281ee6b5c7a14e500acee2 --- /dev/null +++ b/src/zip-reader.h @@ -0,0 +1,158 @@ +/* + * + * Copyright (C) 2026, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#ifndef UKUI_FILE_METADATA_ZIP_READER_H +#define UKUI_FILE_METADATA_ZIP_READER_H + +#include + +#include +#include +#include +#include +#include +#include + +#include "ukui-file-metadata_global.h" + +namespace UkuiFileMetadata { + +/** + * Lightweight ZIP archive reader for metadata extractors and external callers. + * + * The reader opens archives lazily. Constructing the object or changing the + * archive path does not open the archive immediately. Callers can use `open()` + * proactively or rely on `entryNames()` / `readEntry()` to open the archive on + * demand. + */ + +class UKUIFILEMETADATA_EXPORT ZipReader +{ +public: + /** + * Create a reader bound to `archivePath`. + * + * The path may be empty. In that case `open()`, `entryNames()`, and + * `readEntry()` will fail until a non-empty path is set. + */ + explicit ZipReader(QString archivePath = {}); + + /** + * Destroy the reader and close the archive if it is still open. + */ + ~ZipReader(); + + /** + * Replace the archive path used by this reader. + * + * If the reader is currently open, the current archive is closed first and + * any cached entry name list is discarded. + */ + void setArchivePath(const QString &archivePath); + + /** + * Return the archive path currently associated with the reader. + */ + QString archivePath() const; + + /** + * Return the fallback codec names used for non-UTF8 ZIP entry names. + * + * `ZipReader` always tries the system codec before this list. The default + * fallback list contains only `CP437`, which is the ZIP specification + * default when no Unicode metadata is present. Use + * `setFallbackFileNameCodecs()` to provide additional legacy codecs when + * the creating system's encoding is known. + */ + QList fallbackFileNameCodecs() const; + + /** + * Replace the fallback codec chain used for non-UTF8 ZIP entry names. + * + * Changing the codec list closes the current archive and clears cached + * entry names so subsequent lookups use the new decoding policy. + */ + void setFallbackFileNameCodecs(const QList &codecNames); + + /** + * Open the archive referenced by `archivePath()`. + * + * Returns `true` if the archive is already open or if opening succeeds. + * Returns `false` when the path is empty, the archive cannot be opened, or + * the archive cannot be positioned on its first entry. + */ + bool open(); + + /** + * Return whether the archive is currently open. + */ + bool isOpen() const; + + /** + * Close the current archive and clear any cached entry names. + * + * Calling `close()` on an already closed reader is safe. + */ + void close(); + + /** + * Return all entry names in the archive. + * + * The list is cached after the first successful scan and rebuilt whenever + * the archive path changes or the reader is closed. An empty list is + * returned if the archive cannot be opened or scanned. + */ + QStringList entryNames(); + + /** + * Read the uncompressed contents of `entryName` into `data`. + * + * `data` must be non-null. The archive is opened automatically if needed. + * `caseSensitivity` controls how the entry lookup is performed. On success, + * `data` is replaced with the full entry contents and the method returns + * `true`. On failure, `data` is cleared and the method returns `false`. + */ + bool readEntry(const QString &entryName, QByteArray *data, + Qt::CaseSensitivity caseSensitivity = Qt::CaseSensitive); + + /** + * Process the uncompressed contents of `entryName` through a sequential device. + * + * `processor` must be callable. The archive is opened automatically if needed. + * `caseSensitivity` controls how the entry lookup is performed. The method + * returns `true` only when the entry is found, the callback returns `true`, + * no read error occurs, and the entry closes successfully. Returning + * `false` from the callback is treated as a processing failure even if the + * caller intentionally stops reading early; the entry is still closed + * before returning. + */ + bool processEntry(const QString &entryName, + const std::function &processor, + Qt::CaseSensitivity caseSensitivity = Qt::CaseSensitive); + +private: + Q_DISABLE_COPY(ZipReader) + + class Private; + QScopedPointer d; +}; + +} // namespace UkuiFileMetadata + +#endif diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 6c853b2725573a53cd461109cab2a86f31005139..66b91f0fb21f1605907c519fb887b60a9f48ecbe 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -8,3 +8,9 @@ target_link_libraries(dump Qt${QT_VERSION_MAJOR}::Core ukui-file-metadata ) + +configure_file(package-config-test.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/package-config-test.cmake + @ONLY) +add_test(PackageConfigTest + ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/package-config-test.cmake) diff --git a/tests/package-config-test.cmake.in b/tests/package-config-test.cmake.in new file mode 100644 index 0000000000000000000000000000000000000000..208f097bd9e16690f9ac3fa59381f757299eb895 --- /dev/null +++ b/tests/package-config-test.cmake.in @@ -0,0 +1,48 @@ +set(test_root "@CMAKE_CURRENT_BINARY_DIR@/package-config-test") +file(REMOVE_RECURSE "${test_root}") +file(MAKE_DIRECTORY "${test_root}") + +set(consumer_source_dir "${test_root}/consumer") +file(MAKE_DIRECTORY "${consumer_source_dir}") + +file(WRITE "${consumer_source_dir}/main.cpp" " + #include + int main() { return 0; } + ") + +file(WRITE "${consumer_source_dir}/CMakeLists.txt" [=[ +cmake_minimum_required(VERSION 3.14) +project(ukui_file_metadata_package_consumer LANGUAGES CXX) + +find_package(ukui-file-metadata CONFIG REQUIRED) + +add_executable(package-consumer main.cpp) +target_link_libraries(package-consumer PRIVATE ukui-file-metadata) +]=]) + +function(run_checked) + execute_process( + COMMAND ${ARGN} + RESULT_VARIABLE result + OUTPUT_VARIABLE stdout + ERROR_VARIABLE stderr + ) + if (NOT result EQUAL 0) + message(FATAL_ERROR "Command failed: ${ARGN}\nstdout:\n${stdout}\nstderr:\n${stderr}") + endif() +endfunction() + +set(build_tree_package_dir "@CMAKE_BINARY_DIR@/src") +set(build_tree_consumer_dir "${test_root}/build-tree-consumer") +run_checked("@CMAKE_COMMAND@" -S "${consumer_source_dir}" -B "${build_tree_consumer_dir}" + "-Dukui-file-metadata_DIR=${build_tree_package_dir}") +run_checked("@CMAKE_COMMAND@" --build "${build_tree_consumer_dir}") + +set(install_root "${test_root}/install-root") +run_checked("@CMAKE_COMMAND@" -E env "DESTDIR=${install_root}" "@CMAKE_COMMAND@" --install "@CMAKE_BINARY_DIR@") + +set(installed_package_dir "${install_root}/usr/share/cmake/ukui-file-metadata") +set(installed_consumer_dir "${test_root}/install-tree-consumer") +run_checked("@CMAKE_COMMAND@" -S "${consumer_source_dir}" -B "${installed_consumer_dir}" + "-Dukui-file-metadata_DIR=${installed_package_dir}") +run_checked("@CMAKE_COMMAND@" --build "${installed_consumer_dir}")