Select file
Updated at:2025-11-03
The SelectObject API allows users to execute SQL statements on the content of objects in specified formats (CSV/JSON/Parquet) in BOS, and returns the file content required by users after screening, analyzing, and filtering the object content through SQL, a structured query language. Please refer to [Select Object](BOS/API Reference/Object-Related Interface/Select scanning/SelectObject.md)
For usage restrictions, refer to the "Select Object" section in "Developer Guide" under "Using and Managing Data.\
Query CSV files
Example:
Python
1 from baidubce import compat
2 import base64
3
4# For easy understanding, we first upload a simple CSV file
5 csv_content = """
6 1,Maurits,2017-09-1216:32:57,685856330,-540265154.48,true
7 2,Iago,2018-02-01 12:25:01,-642946677,3781354659.89,false
8 3,Dionisio,2018-02-16 09:52:24,-3823711977,79336720.77,false
9 4,Aleen,2018-05-17 11:48:45,-3289131518,1499686289.41,false
10 5,Herschel,2019-06-04 02:28:37,3456163349,-3810272511.88,true
11 """
12 bos_client.put_object_from_string(bucket_name, key, csv_content)
13# Set parameters for the select_object() API
14 select_object_args = {
15 "expressionType": "SQL",
16 "inputSerialization": {
17 "compressionType": "NONE",
18 "csv": {
19 "fileHeaderInfo": "NONE",
20 "recordDelimiter": "Cg==",
21 "fieldDelimiter": "LA==",
22 "quoteCharacter": "Ig==",
23 "commentCharacter": "Iw=="
24 }
25 },
26 "outputSerialization": {
27 "outputHeader": False,
28 "csv": {
29 "quoteFields": "ALWAYS",
30 "recordDelimiter": "Cg==",
31 "fieldDelimiter": "LA==",
32 "quoteCharacter": "Ig=="
33 }
34 },
35 "requestProgress": {
36 "enabled": True
37 }
38 }
39# Set the SQL statement for the query, which needs to be base64-encoded
40 sql_exp = "SELECT _1, _2, _6 FROM BosObject"
41 select_object_args["expression"] = compat.convert_to_string(base64.standard_b64encode(compat.convert_to_bytes(sql_exp)))
42# Call the Select Object API
43 select_response = bos_client.select_object(bucket_name, key, select_object_args)
44# Obtain the generator of the returned results
45 result = select_response.result()
46 for msg in result:
47 print(msg)
48 if msg.headers["message-type"] == "Records":
49 print("type: {}, heades: {}, payload: {}, crc: {}".format(msg.type, msg.headers, msg.payload, msg.crc))
50 elif msg.headers["message-type"] == "Cont":
51 print("type: {}, heades: {}, bytes_scanned: {}, bytes_returned: {}, crc: {}".format(msg.type, msg.headers,
52 msg.bytes_scanned, msg.bytes_returned, msg.crc))
53 else:
54 print("type: {}, heades: {}, crc: {}".format(msg.type, msg.headers, msg.crc))
Query JSON files
Example:
Python
1 from baidubce import compat
2 import base64
3
4# For easy understanding, we first upload a simple json file
5 json_content = """
6 {
7 "name": "Smith",
8 "age": 16,
9 "weight": 65.5,
10 "org": null,
11 "projects":
12 [
13 {"project_name":"project1", "completed":false},
14 {"project_name":"project2", "completed":true}
15 ]
16 }
17 """
18 bos_client.put_object_from_string(bucket_name, key, json_content)
19# Set parameters for the select_object() API
20 select_object_args = {
21 "expressionType": "SQL",
22 "inputSerialization": {
23 "compressionType": "NONE",
24 "json": {
25 "type": "DOCUMENT"
26 }
27 },
28 "outputSerialization": {
29 "json": {
30 "recordDelimiter": "Cg=="
31 }
32 },
33 "requestProgress": {
34 "enabled": True
35 }
36 }
37# Set the SQL statement for the query, which needs to be base64-encoded
38 sql_exp = "select projects from BosObject where name='Smith'"
39 select_object_args["expression"] = compat.convert_to_string(base64.standard_b64encode(compat.convert_to_bytes(sql_exp)))
40# Call the Select Object API
41 select_response = bos_client.select_object(bucket_name, key, select_object_args)
42# Obtain the generator of the returned results
43 result = select_response.result()
44 for msg in result:
45 print(msg)
46 if msg.headers["message-type"] == "Records":
47 print("type: {}, heades: {}, payload: {}, crc: {}".format(msg.type, msg.headers, msg.payload, msg.crc))
48 elif msg.headers["message-type"] == "Cont":
49 print("type: {}, heades: {}, bytes_scanned: {}, bytes_returned: {}, crc: {}".format(msg.type, msg.headers,
50 msg.bytes_scanned, msg.bytes_returned, msg.crc))
51 else:
52 print("type: {}, heades: {}, crc: {}".format(msg.type, msg.headers, msg.crc))
Query Parquet files
Python
1 from baidubce import compat
2 import base64
3
4# We first upload a simple Parquet file
5 '''
6 Content parsed from Parquet files
7 {"Name":"StudentName","Age":20,"Id":0,"Weight":50,"Sex":true,"Day":19240,"Scores":{"computer":80,"math":90,"physics":90}}
8 {"Name":"StudentName","Age":21,"Id":1,"Weight":50.1,"Sex":false,"Day":19240,"Scores":{"computer":81,"math":91,"physics":91}}
9 {"Name":"StudentName","Age":22,"Id":2,"Weight":50.2,"Sex":true,"Day":19240,"Scores":{"computer":82,"math":92,"physics":92}}
10 {"Name":"StudentName","Age":23,"Id":3,"Weight":50.3,"Sex":false,"Day":19240,"Scores":{"computer":83,"math":93,"physics":90}}
11 {"Name":"StudentName","Age":24,"Id":4,"Weight":50.4,"Sex":true,"Day":19240,"Scores":{"computer":84,"math":94,"physics":91}}
12 {"Name":"StudentName","Age":20,"Id":5,"Weight":50.5,"Sex":false,"Day":19240,"Scores":{"computer":85,"math":90,"physics":92}}
13 {"Name":"StudentName","Age":21,"Id":6,"Weight":50.6,"Sex":true,"Day":19240,"Scores":{"computer":86,"math":91,"physics":90}}
14 {"Name":"StudentName","Age":22,"Id":7,"Weight":50.7,"Sex":false,"Day":19240,"Scores":{"computer":87,"math":92,"physics":91}}
15 {"Name":"StudentName","Age":23,"Id":8,"Weight":50.8,"Sex":true,"Day":19240,"Scores":{"computer":88,"math":93,"physics":92}}
16 {"Name":"StudentName","Age":24,"Id":9,"Weight":50.9,"Sex":false,"Day":19240,"Scores":{"computer":89,"math":94,"physics":90}}
17 '''
18 bos_client.put_object_from_file(bucket_name, key, parquet_file_name)
19# Set parameters for the select_object() API
20 select_object_args = {
21 "expressionType": "SQL",
22 "inputSerialization": {
23 "compressionType": "NONE",
24 "parquet": {}
25 },
26 "outputSerialization": {
27 "json": {
28 "recordDelimiter": "Cg=="
29 }
30 },
31 "requestProgress": {
32 "enabled": false
33 }
34 }
35# Set the SQL statement for the query, which needs to be base64-encoded
36 sql_exp = "select * from BosObject s where s.Scores.computer > 85"
37 select_object_args["expression"] = compat.convert_to_string(base64.standard_b64encode(compat.convert_to_bytes(sql_exp)))
38# Call the Select Object API
39 select_response = bos_client.select_object(bucket_name, key, select_object_args)
40# Obtain the generator of the returned results
41 result = select_response.result()
42 for msg in result:
43 print(msg)
44 if msg.headers["message-type"] == "Records":
45 print("type: {}, heades: {}, payload: {}, crc: {}".format(msg.type, msg.headers, msg.payload, msg.crc))
46 elif msg.headers["message-type"] == "Cont":
47 print("type: {}, heades: {}, bytes_scanned: {}, bytes_returned: {}, crc: {}".format(msg.type, msg.headers,
48 msg.bytes_scanned, msg.bytes_returned, msg.crc))
49 else:
50 print("type: {}, heades: {}, crc: {}".format(msg.type, msg.headers, msg.crc))
