1919# import the class of the acceleration model. here is the example of BertForSequenceClassification.
2020from transformers .modeling_bert import BertModel as TorchBertModel
2121from transformers import BertTokenizer
22- from transformers .modeling_bert import BertForSequenceClassification as TorchBertForSequenceClassification
22+ from transformers .modeling_bert import (
23+ BertForSequenceClassification as TorchBertForSequenceClassification ,
24+ )
2325import os
2426import torch
2527from typing import Optional
2628
2729
28- #TODO(jiarufang) developed under v0.1.0, after that not tested.
29- #Contact me if you find it is wrong.
30+ # TODO(jiarufang) developed under v0.1.0, after that not tested.
31+ # Contact me if you find it is wrong.
3032class BertForSequenceClassification : # create a new class for speeding up
3133 def __init__ (
32- self , bertmodel , classifier
34+ self , bertmodel , classifier
3335 ): # the realization of the init function(we can just copy it)
3436 self .bert = bertmodel
3537 self .classifier = classifier
3638
3739 def __call__ (
38- self , # the realization of the call function(we can just copy it)
39- inputs ,
40- attention_masks = None ,
41- token_type_ids = None ,
42- position_ids = None ,
43- pooling_type = PoolingType .FIRST ,
44- return_type = None ):
45- pooler_output , _ , _ = self .bert (inputs ,
46- attention_masks ,
47- token_type_ids ,
48- position_ids ,
49- pooling_type ,
50- return_type = ReturnType .TORCH )
40+ self , # the realization of the call function(we can just copy it)
41+ input_ids ,
42+ attention_mask = None ,
43+ token_type_ids = None ,
44+ position_ids = None ,
45+ pooling_type = PoolingType .FIRST ,
46+ return_type = None ,
47+ ):
48+ bert_outputs = self .bert (
49+ input_ids ,
50+ attention_mask ,
51+ token_type_ids ,
52+ position_ids ,
53+ pooling_type ,
54+ return_type = ReturnType .TORCH ,
55+ )
56+ pooled_output = bert_outputs [1 ]
5157 logits = self .classifier (
52- pooler_output
58+ pooled_output
5359 ) # It's the output of classifier, if User want to output the other type, he can define them after that.
5460 return logits
5561
5662 @staticmethod
5763 def from_torch (
58- model : TorchBertModel , # from_torch函数实现
59- device : Optional [torch .device ] = None ):
60- if device is not None and 'cuda' in device .type and torch .cuda .is_available (
61- ):
64+ model : TorchBertModel , device : Optional [torch .device ] = None # from_torch函数实现
65+ ):
66+ if device is not None and "cuda" in device .type and torch .cuda .is_available ():
6267 model .to (device )
6368 bertmodel = turbo_transformers .BertModel .from_torch (model .bert )
6469 # We can copy the following code and do not change it
@@ -67,11 +72,11 @@ def from_torch(
6772 return BertForSequenceClassification (bertmodel , model .classifier )
6873
6974 @staticmethod
70- def from_pretrained (model_id_or_path : str ,
71- device : Optional [torch .device ] = None ):
75+ def from_pretrained (model_id_or_path : str , device : Optional [torch .device ] = None ):
7276 # First, Use the function of from_pretrained to load the model you trained.
7377 torch_model = TorchBertForSequenceClassification .from_pretrained (
74- model_id_or_path )
78+ model_id_or_path
79+ )
7580 # Then, Use the init function of the acceleration model to get it.
7681 model = BertForSequenceClassification .from_torch (torch_model , device )
7782 model ._torch_model = torch_model # prevent destroy torch model.
@@ -82,18 +87,24 @@ def from_pretrained(model_id_or_path: str,
8287turbo_transformers .set_num_threads (4 )
8388
8489model_id = os .path .join (
85- os .path .dirname (__file__ ),
86- 'test-seq-classification-model' ) # the model of huggingface's path
87- tokenizer = BertTokenizer .from_pretrained (
88- model_id ) # the initialization of tokenizer
90+ os .path .dirname (__file__ ), "bert_model"
91+ ) # the model of huggingface's path
92+ tokenizer = BertTokenizer .from_pretrained (model_id ) # the initialization of tokenizer
8993turbo_model = BertForSequenceClassification .from_pretrained (
90- model_id ,
91- torch . device ( 'cpu:0' ) ) # the initialization of the acceleration model
94+ model_id , torch . device ( "cpu:0" )
95+ ) # the initialization of the acceleration model
9296
9397# predict after loading the model
94- input_ids = torch .tensor (
95- tokenizer .encode ('测试一下bert模型的性能和精度是不是符合要求?' ,
96- add_special_tokens = True )).unsqueeze (0 )
97- torch_result = turbo_model (input_ids )
98- print (torch_result )
99- # tensor([[ 0.1451, -0.0373]], grad_fn=<AddmmBackward>)
98+
99+ text = "Sample input text"
100+ inputs = tokenizer .encode_plus (text , add_special_tokens = True , return_tensors = "pt" )
101+ # turbo_result holds the returned logits from TurboTransformers model
102+ turbo_result = turbo_model (** inputs )
103+
104+ torch_model = TorchBertForSequenceClassification .from_pretrained (model_id )
105+ # torch_result holds the returned logits from original Transformers model
106+ torch_result = torch_model (** inputs )[0 ]
107+ print (turbo_result )
108+ # tensor([[0.2716, 0.0318]], grad_fn=<AddmmBackward>)
109+ print (torch_result ) # torch_result and turbo_result should hold the same logits
110+ # tensor([[0.2716, 0.0318]], grad_fn=<AddmmBackward>)
0 commit comments