Create JSON from given data in Swift

106 Views Asked by At

I want to execute POST URLRequest but first I need to create request body. The body should look like this:

{
  "model": "gpt-4-vision-preview",
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": <mutableStringValue>
        },
        {
          "type": "image_url",
          "image_url": {
            "url": <mutableStringValue>
          }
        }
      ]
    }
  ],
  "max_tokens": 300
}

I cannot hardcode this as .json file because parameters "text" (under first "type") and "image_url" (under second "type") are given as function parameters. I don't know how to handle two "type" keys.

As of now, I came up with something like this:

struct ImageInputRequestBody: Codable {
        let model: String = "gpt-4-vision-preview"
        let messages: [Message]
        let maxTokens: Int = 300
        enum CodingKeys: String, CodingKey {
            case model, messages
            case maxTokens = "max_tokens"
        }
        
        struct Message: Codable {
            let role: String
            let content: [Content]
        }
        struct Content: Codable {
            let type: String
            let text: String?
        }
    }

But here I'm missing second "type" and "image_url" content

3

There are 3 best solutions below

1
workingdog support Ukraine On BEST ANSWER

You could try something simple like this:

struct Content: Identifiable, Codable {
    let id = UUID()
    let type: String
    var text: String?
    var imageUrl: ImgUrl?
    
    enum CodingKeys: String, CodingKey {
        case type, text
        case imageUrl = "image_url"
    }

    // where value is the text or the url string
    // depending on the type given (which could be an enum)
    init(type: String, value: String? = nil) {
        self.type = type
        self.text = nil
        self.imageUrl = nil
    
        if type == "text" {
            self.text = value
        } else {
            if value != nil {
                self.imageUrl = ImgUrl(url: value!)
            }
        }
    }
    
    public func encode(to encoder: Encoder) throws {
        var container = encoder.container(keyedBy: CodingKeys.self)
        try container.encode(type, forKey: .type)
        if type == "text" {
            try container.encode(text, forKey: .text)
        } else {
            try container.encode(imageUrl, forKey: .imageUrl)
        }
    }
}

struct ImageInputRequestBody: Codable {
    let model: String = "gpt-4-vision-preview"
    let messages: [Message]
    let maxTokens: Int = 300
    
    enum CodingKeys: String, CodingKey {
        case model, messages
        case maxTokens = "max_tokens"
    }
}

struct Message: Codable {
    let role: String
    let content: [Content]
}

struct ImgUrl: Codable {
    let url: String
    // let detail: String?  // <--- if desired later
}

Note, you need to add two (or more for multi-images) Content to the Message object.

For example:

func getTestRequestBody(text: String? = nil, url: String? = nil) -> ImageInputRequestBody {
    return ImageInputRequestBody(messages: [
        Message(role: "user", content: [
            Content(type: "text", value: text),
            Content(type: "image_url", value: url)
        ])
    ])
}

EDIT-1

Use it like this to post to OpenAI

     let text = "What's in this image?"
     let imgurl =  "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
     // ....
     .task {
         await fetch(txt: text, urlString: imgurl)
     }
     //....
     
     private func fetch(txt: String? = nil, urlString: String? = nil) async {
         let apikey = "YOUR-APIKEY"
         
         if let url = URL(string: "https://api.openai.com/v1/chat/completions") {
             var request = URLRequest(url: url)
             request.addValue("application/json", forHTTPHeaderField: "Content-Type")
             request.addValue("Bearer \(apikey)", forHTTPHeaderField: "Authorization")
             request.httpMethod = "POST"

             let body = ImageInputRequestBody(messages: [   // <--- here
                 Message(role: "user", content: [
                     Content(type: "text", value: txt),
                     Content(type: "image_url", value: urlString)
                 ])
             ])
             
             do {
                 let encoded = try JSONEncoder().encode(body)
                 request.httpBody = encoded
                 // check the encoding
 //                if let encodedString = String(data: encoded, encoding: .utf8) {
 //                    print("\n----> encodedString: \n \(encodedString) \n")
 //                }
                 let (data, _) = try await URLSession.shared.data(for: request)
                 print("-----> \n \(String(data: data, encoding: .utf8) as AnyObject) \n")
                 
                 // let decoded = try JSONDecoder().decode(OpenAIResponse.self, from: data)
                 // .....
             }
             catch { print(error) }
         }
     }
     
0
dmc On

As far as I remember, if your task is to only create the json mentioned in the question, you can simply make a multiline string, and then decode it into Data.

let value1 = "your text"
let value2 = "your url"
let text = """
    {
      "model": "gpt-4-vision-preview",
      "messages": [
        {
          "role": "user",
          "content": [
            {
              "type": "text",
              "text": "\(value1)"
            },
            {
              "type": "image_url",
              "image_url": {
                "url": "\(value2)"
              }
            }
          ]
        }
      ],
      "max_tokens": 300
    }
    """
let encoded = text.data(using: .utf8)!

Or it can be a #"" string, to make the code more compact.

If such JSONs may have a random number of items in "content", I would make an enum with 2 cases for text and image, and implement Encodable's method to convert them properly. For instance

enum Content: Encodable {
    case text(String)
    case imageURL(URL)
    
    private enum CodingKeys: String, CodingKey {
        case type
        case text
        case imageURL = "image_url"
    }
    
    private enum ImageURLCodingKeys: CodingKey {
        case url
    }
    
    func encode(to encoder: Encoder) throws {
        var container = encoder.container(keyedBy: CodingKeys.self)
        switch self {
        case .text(let text):
            try container.encode("text", forKey: .type)
            try container.encode(text, forKey: .text)
        case .imageURL(let url):
            try container.encode("image_url", forKey: .type)
            var nestedContainer = container.nestedContainer(keyedBy: ImageURLCodingKeys.self, forKey: .imageURL)
            try nestedContainer.encode(url.absoluteString, forKey: .url)
        }
    }
}

I also wanted to offer a more objective way, but for some reason Apple has made encode(_:) a generic method, and to do the following code, you would need to use another JSON parser:

struct TextContent: Encodable {
    let text: String
    
    private enum CodingKeys: String, CodingKey {
        case type
        case text
    }
    
    func encode(to encoder: Encoder) throws {
        var container = encoder.container(keyedBy: CodingKeys.self)
        try container.encode("text", forKey: .type)
        try container.encode(text, forKey: .text)
    }
}

struct ImageURLContent: Encodable {
    let url: URL
    
    private enum CodingKeys: String, CodingKey {
        case type
        case imageURL = "image_url"
    }
    
    private enum ImageURLCodingKeys: CodingKey {
        case url
    }
    
    func encode(to encoder: Encoder) throws {
        var container = encoder.container(keyedBy: CodingKeys.self)
        try container.encode("image_url", forKey: .type)
        var payloadContainer = container.nestedContainer(keyedBy: ImageURLCodingKeys.self, forKey: .imageURL)
        try payloadContainer.encode(url.absoluteString, forKey: .url)
    }
}

let content: [Encodable] = [
    TextContent(text: "your text"),
    ImageURLContent(url: URL(string: "https://test.com")!)
]
let data = SomeThirdPartyEncoder().encode(content) // JSONEncoder doesn't work with such the array, unfortunately
0
Vader20FF On

Modified @workingdog-support-ukraine solution In case someone would want full request input for https://api.openai.com/v1/chat/completions endpoint

struct ImageRequestInput {
    var request: URLRequest
    
    init(url: URL, apiToken: String, prompt: String, imageBase64String: String) {
        self.request = URLRequest(url: url)
        request.httpMethod = "POST"
        request.allHTTPHeaderFields = [
            "Content-Type": "application/json",
            "Authorization": "Bearer \(apiToken)"
        ]
        
        let body = ImageInputRequestBody(messages: [
            OpenAIMessage(role: "user", content: [
                OpenAIMessageContent(type: .text, value: prompt),
                OpenAIMessageContent(type: .imageURL, value: imageBase64String)
            ])
        ])
        
        do {
            let encoded = try JSONEncoder().encode(body)
            request.httpBody = encoded
        } catch {
            return
        }
    }
}

struct ImageInputRequestBody: Codable {
    let model: String = "gpt-4-vision-preview"
    let messages: [OpenAIMessage]
    let maxTokens: Int = 300
    
    enum CodingKeys: String, CodingKey {
        case model, messages
        case maxTokens = "max_tokens"
    }
}

struct OpenAIMessage: Codable {
    let role: String
    let content: [OpenAIMessageContent]
}

enum OpenAIMessageContentType: String, Codable {
    case text = "text"
    case imageURL = "image_url"
}

struct OpenAIMessageContent: Identifiable, Codable {
    let id = UUID()
    let type: OpenAIMessageContentType
    var text: String?
    var imageUrl: ImgUrl?
    
    enum CodingKeys: String, CodingKey {
        case type, text
        case imageUrl = "image_url"
    }

    init(type: OpenAIMessageContentType, value: String? = nil) {
        self.type = type
        self.text = nil
        self.imageUrl = nil
    
        if type == .text {
            self.text = value
        } else {
            guard let value = value else {
                return
            }
            
            self.imageUrl = ImgUrl(url: "data:image/jpeg;base64,\(value)")
        }
    }
    
    public func encode(to encoder: Encoder) throws {
        var container = encoder.container(keyedBy: CodingKeys.self)
        try container.encode(type, forKey: .type)
        if type == .text {
            try container.encode(text, forKey: .text)
        } else {
            try container.encode(imageUrl, forKey: .imageUrl)
        }
    }
}

struct ImgUrl: Codable {
    let url: String
}