Make attention faster for a some models (#574)

* make attention faster for a couple models

* remove unused generation flags

* add comment on lora

* include text files as well
This commit is contained in:
Awni Hannun
2024-03-14 21:35:54 -07:00
committed by GitHub
parent 3f3741d229
commit e4b19bb9e1
6 changed files with 35 additions and 56 deletions

View File

@@ -81,6 +81,7 @@ def get_model_path(path_or_hf_repo: str, revision: Optional[str] = None) -> Path
"*.py",
"tokenizer.model",
"*.tiktoken",
"*.txt",
],
)
)
@@ -396,7 +397,6 @@ def fetch_from_hub(
model_path: Path, lazy: bool = False
) -> Tuple[nn.Module, dict, PreTrainedTokenizer]:
model = load_model(model_path, lazy)
config = AutoConfig.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)