<?php

// File: /Applications/MAMP/htdocs/wordpress/wp-content/plugins/gpt3-ai-content-generator/lib/vector-stores/file-upload/pinecone/fn-upload-file-and-upsert.php
// Status: MODIFIED

namespace WPAICG\Lib\VectorStores\FileUpload\Pinecone;

use WP_Error;
use WPAICG\Vector\AIPKit_Vector_Store_Manager;
use WPAICG\Core\AIPKit_AI_Caller;
use WPAICG\Includes\AIPKit_Upload_Utils;
use WPAICG\Lib\Utils\AIPKit_Pdf_Parser;
use WPAICG\Dashboard\Ajax\AIPKit_Vector_Store_Pinecone_Ajax_Handler;

if (!defined('ABSPATH')) {
    exit; // Exit if accessed directly
}

/**
 * Handles the logic for ajax_upload_file_and_upsert_to_pinecone.
 * This file is intended to be conditionally included by the AJAX handler if Pro.
 *
 * @param AIPKit_Vector_Store_Manager $vector_store_manager
 * @param AIPKit_AI_Caller $ai_caller
 * @param array $pinecone_config
 * @param AIPKit_Vector_Store_Pinecone_Ajax_Handler $handler_instance The AJAX handler instance for logging.
 * @return array|WP_Error An array including data for logging by the caller, or WP_Error.
 */
function _aipkit_pinecone_ajax_upload_file_and_upsert_logic(
    AIPKit_Vector_Store_Manager $vector_store_manager,
    AIPKit_AI_Caller $ai_caller,
    array $pinecone_config,
    AIPKit_Vector_Store_Pinecone_Ajax_Handler $handler_instance
): array|\WP_Error {
    // phpcs:ignore WordPress.Security.NonceVerification.Missing -- Nonce is checked by the calling handler method.
    $target_index_name = isset($_POST['index_name']) ? sanitize_text_field(wp_unslash($_POST['index_name'])) : '';
    // phpcs:ignore WordPress.Security.NonceVerification.Missing -- Nonce is checked by the calling handler method.
    $embedding_provider_key = isset($_POST['embedding_provider']) ? sanitize_key($_POST['embedding_provider']) : '';
    // phpcs:ignore WordPress.Security.NonceVerification.Missing -- Nonce is checked by the calling handler method.
    $embedding_model = isset($_POST['embedding_model']) ? sanitize_text_field(wp_unslash($_POST['embedding_model'])) : '';

    if (empty($target_index_name)) {
        return new WP_Error('missing_target_store_pinecone_upload_lib', __('Target Pinecone index is required.', 'gpt3-ai-content-generator'), ['status' => 400]);
    }
    // phpcs:ignore WordPress.Security.NonceVerification.Missing -- Nonce is checked by the calling handler method.
    if (!isset($_FILES['file_to_upload'])) {
        return new WP_Error('no_file_pinecone_upload_lib', __('No file provided for Pinecone upload.', 'gpt3-ai-content-generator'), ['status' => 400]);
    }
    if (empty($embedding_provider_key) || empty($embedding_model)) {
        return new WP_Error('missing_embedding_config_pinecone_lib', __('Embedding provider and model are required.', 'gpt3-ai-content-generator'), ['status' => 400]);
    }

    if (!class_exists(\WPAICG\Includes\AIPKit_Upload_Utils::class)) {
        return new WP_Error('upload_util_missing_lib', __('Upload utility is missing.', 'gpt3-ai-content-generator'), ['status' => 500]);
    }
    // phpcs:ignore WordPress.Security.NonceVerification.Missing, WordPress.Security.ValidatedSanitizedInput.InputNotSanitized -- Nonce checked in handler; file data is validated by AIPKit_Upload_Utils::validate_vector_upload_file().
    $file_data = $_FILES['file_to_upload'];
    $upload_limits = \WPAICG\Includes\AIPKit_Upload_Utils::get_effective_upload_limit_summary();

    // --- MODIFIED: Validate for both text/plain AND application/pdf ---
    $allowed_mime_types_for_pinecone = ['text/plain', 'application/pdf'];
    $validation_error = \WPAICG\Includes\AIPKit_Upload_Utils::validate_vector_upload_file(
        $file_data,
        $allowed_mime_types_for_pinecone, // Pass allowed types
        $upload_limits['limit_bytes']
    );
    if (is_wp_error($validation_error)) {
        return $validation_error;
    }
    // --- END MODIFICATION ---

    // --- MODIFIED: Conditional Text Extraction ---
    $file_content_for_embedding = '';
    $uploaded_file_mime_type = '';
    if (function_exists('mime_content_type') && is_readable($file_data['tmp_name'])) {
        $uploaded_file_mime_type = mime_content_type($file_data['tmp_name']);
    } elseif (isset($file_data['type'])) {
        $uploaded_file_mime_type = $file_data['type'];
    }


    if ($uploaded_file_mime_type === 'application/pdf') {
        if (!class_exists(AIPKit_Pdf_Parser::class)) {
            $pdf_parser_path = WPAICG_LIB_DIR . 'utils/class-aipkit-pdf-parser.php';
            if (file_exists($pdf_parser_path)) {
                require_once $pdf_parser_path;
            } else {
                return new WP_Error('pdf_parser_missing_lib', __('PDF parsing utility is missing (Pinecone).', 'gpt3-ai-content-generator'), ['status' => 500]);
            }
        }
        $pdf_parser = new AIPKit_Pdf_Parser();
        $extracted_text_or_error = $pdf_parser->extract_text($file_data['tmp_name']);
        if (is_wp_error($extracted_text_or_error)) {
            return new WP_Error('pdf_parsing_failed_pinecone_lib', 'PDF Parsing Failed: ' . $extracted_text_or_error->get_error_message(), ['status' => 500, 'log_data' => [
               'vector_store_id' => $target_index_name, 'vector_store_name' => $target_index_name,
               'status' => 'content_error', 'message' => 'PDF parsing failed: ' . $extracted_text_or_error->get_error_message(),
               'embedding_provider' => $embedding_provider_key, 'embedding_model' => $embedding_model,
               'indexed_content' => sanitize_file_name($file_data['name']), // Log filename as placeholder
               'post_title' => sanitize_file_name($file_data['name']),
               'source_type_for_log' => 'file_upload_global_form'
            ]]);
        }
        $file_content_for_embedding = $extracted_text_or_error;
    } elseif ($uploaded_file_mime_type === 'text/plain') {
        $file_content_for_embedding = file_get_contents($file_data['tmp_name']);
    } else {
        /* translators: %s is the MIME type */
        return new WP_Error('unsupported_file_type_pinecone_lib', sprintf(__('Unsupported file type after validation (Pinecone). Given: %s', 'gpt3-ai-content-generator'), $uploaded_file_mime_type), ['status' => 400]);
    }
    // --- END MODIFICATION ---

    if ($file_content_for_embedding === false || empty(trim($file_content_for_embedding))) {
        return new WP_Error('file_read_error_pinecone_lib', __('Could not read file content or file is empty.', 'gpt3-ai-content-generator'), ['status' => 500, 'log_data' => [
            'vector_store_id' => $target_index_name, 'vector_store_name' => $target_index_name,
            'status' => 'content_error', 'message' => 'Could not read file content or file is empty for Pinecone.',
            'embedding_provider' => $embedding_provider_key, 'embedding_model' => $embedding_model,
            'indexed_content' => sanitize_file_name($file_data['name']),
            'post_title' => sanitize_file_name($file_data['name']),
            'source_type_for_log' => 'file_upload_global_form'
        ]]);
    }

    $provider_map = ['openai' => 'OpenAI', 'google' => 'Google', 'azure' => 'Azure'];
    $embedding_provider_norm = $provider_map[$embedding_provider_key] ?? 'OpenAI';
    $embedding_options = ['model' => $embedding_model];
    $embedding_result = $ai_caller->generate_embeddings($embedding_provider_norm, $file_content_for_embedding, $embedding_options);

    if (is_wp_error($embedding_result) || empty($embedding_result['embeddings'][0])) {
        $error_msg = is_wp_error($embedding_result) ? $embedding_result->get_error_message() : 'No embeddings returned.';
        return new WP_Error('embedding_failed_pinecone_file_lib', __('Failed to generate vector for file content.', 'gpt3-ai-content-generator'), ['log_data' => [
            'vector_store_id' => $target_index_name, 'vector_store_name' => $target_index_name,
            'status' => 'failed', 'message' => 'Embedding failed: ' . $error_msg,
            'embedding_provider' => $embedding_provider_key, 'embedding_model' => $embedding_model,
            'indexed_content' => $file_content_for_embedding,
            'post_title' => sanitize_file_name($file_data['name']),
            'source_type_for_log' => 'file_upload_global_form'
        ]]);
    }

    $vector_values = $embedding_result['embeddings'][0];
    $pinecone_vector_id = 'pinecone_file_' . hash('md5', $file_data['name'] . $file_content_for_embedding) . '_' . time();
    $metadata = [
        'source' => 'file_upload_global_form',
        'filename' => sanitize_file_name($file_data['name']),
        'uploaded_at' => current_time('mysql', 1),
        'vector_id' => $pinecone_vector_id
    ];
    $vectors_to_upsert = [['id' => $pinecone_vector_id, 'values' => $vector_values, 'metadata' => $metadata]];

    $upsert_result = $vector_store_manager->upsert_vectors('Pinecone', $target_index_name, $vectors_to_upsert, $pinecone_config);

    if (is_wp_error($upsert_result)) {
        return new WP_Error('upsert_failed_pinecone_lib', 'Upsert to Pinecone failed: ' . $upsert_result->get_error_message(), ['log_data' => [
            'vector_store_id' => $target_index_name, 'vector_store_name' => $target_index_name,
            'status' => 'failed', 'message' => 'Upsert to Pinecone failed: ' . $upsert_result->get_error_message(),
            'embedding_provider' => $embedding_provider_key, 'embedding_model' => $embedding_model,
            'indexed_content' => $file_content_for_embedding,
            'file_id' => $pinecone_vector_id,
            'post_title' => sanitize_file_name($file_data['name']),
            'source_type_for_log' => 'file_upload_global_form'
        ]]);
    }

    return [
        'message' => __('File content embedded and upserted to Pinecone successfully.', 'gpt3-ai-content-generator'),
        'result' => $upsert_result,
        'log_data' => [
            'vector_store_id' => $target_index_name, 'vector_store_name' => $target_index_name,
            'status' => 'indexed', 'message' => 'File content embedded and upserted to Pinecone. Vector ID: ' . $pinecone_vector_id,
            'embedding_provider' => $embedding_provider_key, 'embedding_model' => $embedding_model,
            'indexed_content' => $file_content_for_embedding,
            'file_id' => $pinecone_vector_id,
            'post_title' => sanitize_file_name($file_data['name']),
            'source_type_for_log' => 'file_upload_global_form'
        ]
    ];
}
